Browse Source

Add files via upload

GuHong 4 years ago
parent
commit
7070c282bc

+ 198 - 0
libFM_in_action/DL_model.ipynb

@@ -0,0 +1,198 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using Theano backend.\n",
+      "Using gpu device 0: GeForce GT 740M (CNMeM is enabled with initial size: 80.0% of memory, cuDNN not available)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# -*- coding: utf-8 -*-\n",
+    "\n",
+    "import numpy as np\n",
+    "np.random.seed(2016)\n",
+    "\n",
+    "import os\n",
+    "import glob\n",
+    "import math\n",
+    "import pickle\n",
+    "import datetime\n",
+    "\n",
+    "from keras.layers import Input, Embedding, LSTM, Dense,Flatten, Dropout, merge\n",
+    "from keras.models import Model\n",
+    "\n",
+    "def load_train():\n",
+    "    X_train_uid=[]\n",
+    "    X_train_iid=[]\n",
+    "    Y_train_score=[]\n",
+    "\n",
+    "    path = os.path.join('../scripts',  'train.txt')\n",
+    "    print('Read train data',path)\n",
+    "\n",
+    "    f = open(path, 'r')\n",
+    "    line = f.readline()\n",
+    "    while (1):\n",
+    "        line = f.readline()\n",
+    "        if line == '':\n",
+    "            break\n",
+    "        arr = line.strip().split(',')\n",
+    "        X_train_uid.append(int(arr[0]))\n",
+    "        X_train_iid.append(int(arr[1]))\n",
+    "        Y_train_score.append(float(arr[2]))\n",
+    "    f.close()\n",
+    "    return X_train_uid,X_train_iid,Y_train_score\n",
+    "\n",
+    "def load_test():\n",
+    "    X_test_uid=[]\n",
+    "    X_test_iid=[]\n",
+    "\n",
+    "    path = os.path.join('../scripts',  'test.txt')\n",
+    "    print('Read test data',path)\n",
+    "\n",
+    "    f = open(path, 'r')\n",
+    "    line = f.readline()\n",
+    "    while (1):\n",
+    "        line = f.readline()\n",
+    "        if line == '':\n",
+    "            break\n",
+    "        arr = line.strip().split(',')\n",
+    "        X_test_uid.append(int(arr[0]))\n",
+    "        X_test_iid.append(int(arr[1]))\n",
+    "    f.close()\n",
+    "    return X_test_uid,X_test_iid\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('Read train data', '../scripts/train.txt')\n",
+      "load train data OK.\n",
+      "('Read test data', '../scripts/test.txt')\n",
+      "load test data OK.\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_train_uid,X_train_iid,Y_train_score = load_train()\n",
+    "#print len(X_train_uid),X_train_uid[33177260],max(X_train_uid)\n",
+    "#print len(X_train_iid),X_train_iid[33177260],max(X_train_iid)\n",
+    "#print len(Y_train_score),Y_train_score[33177260]\n",
+    "print \"load train data OK.\"\n",
+    "\n",
+    "X_test_uid,X_test_iid = load_test()\n",
+    "#print len(X_test_uid),X_test_uid[100],max(X_test_uid)\n",
+    "#print len(X_test_iid),X_test_iid[100],max(X_test_iid)\n",
+    "print \"load test data OK.\"\n",
+    "\n",
+    "# normalize train date\n",
+    "X_train_uid=np.array(X_train_uid)\n",
+    "X_train_uid=X_train_uid.reshape(X_train_uid.shape[0],1)\n",
+    "\n",
+    "X_train_iid=np.array(X_train_iid)\n",
+    "X_train_iid=X_train_iid.reshape(X_train_iid.shape[0],1)\n",
+    "\n",
+    "Y_train_score = np.array(Y_train_score).astype('float32')\n",
+    "Y_train_score = (Y_train_score)/ 10\n",
+    "\n",
+    "# normalize test date\n",
+    "X_test_uid=np.array(X_test_uid)\n",
+    "X_test_uid=X_test_uid.reshape(X_test_uid.shape[0],1)\n",
+    "\n",
+    "X_test_iid=np.array(X_test_iid)\n",
+    "X_test_iid=X_test_iid.reshape(X_test_iid.shape[0],1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# define model\n",
+    "input_1=Input(shape=(1,), dtype='int32')\n",
+    "input_2=Input(shape=(1,), dtype='int32')\n",
+    "x1=Embedding(output_dim=128, input_dim=40000, input_length=1)(input_1)\n",
+    "x2=Embedding(output_dim=128, input_dim=120, input_length=1)(input_2)\n",
+    "x1=Flatten()(x1)\n",
+    "x2=Flatten()(x2)\n",
+    "x = merge([x1, x2], mode='concat')\n",
+    "x = Dropout(0.2)(x)\n",
+    "x = Dense(512, activation='relu')(x)\n",
+    "x = Dropout(0.2)(x)\n",
+    "x = Dense(64, activation='relu')(x)\n",
+    "x = Dropout(0.2)(x)\n",
+    "out = Dense(1, activation='sigmoid')(x)\n",
+    "model = Model(input=[input_1, input_2], output=out)\n",
+    "model.compile(optimizer='rmsprop',\n",
+    "              loss='mean_squared_error',\n",
+    "              metrics=[])\n",
+    "# train model\n",
+    "model.fit([X_train_uid, X_train_iid], Y_train_score,nb_epoch=10, batch_size=1024*6)\n",
+    "\n",
+    "# predict\n",
+    "Y_test_score = model.predict([X_test_uid, X_test_iid],batch_size=2048)\n",
+    "Y_test_score = Y_test_score * 10\n",
+    "\n",
+    "f=open(\"predict_DL.csv\",\"w\")\n",
+    "#f.write(\"score\\n\")\n",
+    "for i in range(Y_test_score.shape[0]):\n",
+    "    f.write(\"{:1.4f}\".format(Y_test_score[i,0]))\n",
+    "    f.write(\"\\n\")\n",
+    "f.close()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [Root]",
+   "language": "python",
+   "name": "Python [Root]"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

File diff suppressed because it is too large
+ 641015 - 0
libFM_in_action/predict.libfm


+ 1 - 0
libFM_in_action/sample_submission_MLWARE2.csv

@@ -0,0 +1 @@
+ID,rating

File diff suppressed because it is too large
+ 641016 - 0
libFM_in_action/test_MLWARE2.csv


File diff suppressed because it is too large
+ 958530 - 0
libFM_in_action/train_MLWARE2.csv