{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.models import load_model\n",
    "\n",
    "import numpy as np\n",
    "import random\n",
    "import json\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "corpus length: 109940\n"
     ]
    }
   ],
   "source": [
    "text = open('sayings.txt').read().lower()\n",
    "text = text.replace('\\xa0', ' ').replace('’', \"'\").replace('ﬁ', 'ft').replace('–', '_').replace('”', '\"').replace('‘',\"'\")\n",
    "print('corpus length:', len(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['num_chars', 'indices_char', 'maxlen', 'char_indices'])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d = {}\n",
    "sofar = ''\n",
    "key = None\n",
    "for l in open('chars.js'):\n",
    "    if ' = ' in l:\n",
    "        if key:\n",
    "            d[key] = json.loads(sofar)\n",
    "        key, sofar = l.split(' = ')\n",
    "    else:\n",
    "        sofar += l\n",
    "d[key] = json.loads(sofar)\n",
    "d.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\\n  \"0\": \"\\\\n\",\\n  \"1\": \" \",\\n  \"2\": \"!\",\\n  \"3\": \"\\\\\"\",\\n  \"4\": \"\\'\",\\n  \"5\": \"(\",\\n  \"6\": \")\",\\n  \"7\": \",\",\\n  \"8\": \"-\",\\n  \"9\": \".\",\\n  \"10\": \"0\",\\n  \"11\": \"1\",\\n  \"12\": \"2\",\\n  \"13\": \"3\",\\n  \"14\": \"4\",\\n  \"15\": \"5\",\\n  \"16\": \"6\",\\n  \"17\": \"7\",\\n  \"18\": \"8\",\\n  \"19\": \"9\",\\n  \"20\": \":\",\\n  \"21\": \";\",\\n  \"22\": \"=\",\\n  \"23\": \"?\",\\n  \"24\": \"[\",\\n  \"25\": \"]\",\\n  \"26\": \"_\",\\n  \"27\": \"a\",\\n  \"28\": \"b\",\\n  \"29\": \"c\",\\n  \"30\": \"d\",\\n  \"31\": \"e\",\\n  \"32\": \"f\",\\n  \"33\": \"g\",\\n  \"34\": \"h\",\\n  \"35\": \"i\",\\n  \"36\": \"j\",\\n  \"37\": \"k\",\\n  \"38\": \"l\",\\n  \"39\": \"m\",\\n  \"40\": \"n\",\\n  \"41\": \"o\",\\n  \"42\": \"p\",\\n  \"43\": \"q\",\\n  \"44\": \"r\",\\n  \"45\": \"s\",\\n  \"46\": \"t\",\\n  \"47\": \"u\",\\n  \"48\": \"v\",\\n  \"49\": \"w\",\\n  \"50\": \"x\",\\n  \"51\": \"y\",\\n  \"52\": \"z\",\\n  \"53\": \"\\\\u00e4\",\\n  \"54\": \"\\\\u00e6\",\\n  \"55\": \"\\\\u00e9\",\\n  \"56\": \"\\\\u00eb\"\\n}\\n'"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sofar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "total chars: 57\n",
      "nb sequences: 36634\n"
     ]
    }
   ],
   "source": [
    "char_indices = d['char_indices']\n",
    "chars = ''.join(sorted(char_indices.keys()))\n",
    "print('total chars:', len(chars))\n",
    "indices_char = {int(k): v for k, v in d['indices_char'].items()}\n",
    "maxlen = d['maxlen']\n",
    "step = 3\n",
    "sentences = []\n",
    "next_chars = []\n",
    "for i in range(0, len(text) - maxlen, step):\n",
    "    sentences.append(text[i: i + maxlen])\n",
    "    next_chars.append(text[i + maxlen])\n",
    "print('nb sequences:', len(sentences))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((36634, 40, 57),\n",
       " (36634, 57),\n",
       " 'ﬂﬂﬂﬂﬂﬂﬂﬂﬂﬂﬂﬂﬂﬂ//////////////////////////“““““““““““““———————————————————————————///////////////////////////——————————————/////////////——————————————−−−−−−−−−−−−−//////////////——————————————————————————/////////////………………………………………………………………………………………………………………………………………………““““““““““““““//////////////………………………………………………………………………………………………………………………………………………………………………………………………………………………&&&&&&&&&&&&&……………………………………//////////////……………………………………………………………………%%%%%%%%%%%%%“““““““““““““………………………………………………………………………………………………………………………………………………………………………………………………………………………//////////////……………………………………………………………………&&&&&&&&&&&&&â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99\\x99â\\x80â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99â\\x80\\x99\\x80\\x99')"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "errors = ''\n",
    "X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)\n",
    "y = np.zeros((len(sentences), len(chars)), dtype=np.bool)\n",
    "for i, sentence in enumerate(sentences):\n",
    "    for t, char in enumerate(sentence):\n",
    "        if char in char_indices:\n",
    "            X[i, t, char_indices[char]] = 1\n",
    "        else:\n",
    "            errors += char\n",
    "        if next_chars[i] in char_indices:\n",
    "            y[i, char_indices[next_chars[i]]] = 1\n",
    "X.shape, y.shape, errors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = load_model('nietzsche.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sample(preds, temperature=1.0):\n",
    "    # helper function to sample an index from a probability array\n",
    "    preds = np.asarray(preds).astype('float64')\n",
    "    preds = np.log(preds) / temperature\n",
    "    exp_preds = np.exp(preds)\n",
    "    preds = exp_preds / np.sum(exp_preds)\n",
    "    probas = np.random.multinomial(1, preds, 1)\n",
    "    return np.argmax(probas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "--------------------------------------------------\n",
      "Iteration 1\n",
      "Epoch 1/5\n",
      "36634/36634 [==============================] - 18s 498us/step - loss: 1.2576 - acc: 0.6136\n",
      "Epoch 2/5\n",
      "36634/36634 [==============================] - 19s 525us/step - loss: 1.2139 - acc: 0.6268\n",
      "Epoch 3/5\n",
      "36634/36634 [==============================] - 20s 544us/step - loss: 1.1756 - acc: 0.6364\n",
      "Epoch 4/5\n",
      "36634/36634 [==============================] - 21s 577us/step - loss: 1.1449 - acc: 0.6447\n",
      "Epoch 5/5\n",
      "36634/36634 [==============================] - 20s 559us/step - loss: 1.1164 - acc: 0.6539\n",
      "\n",
      "----- diversity: 0.2\n",
      "----- Generating with seed: \"ome to those who wait\n",
      "all things must pa\"\n",
      "ome to those who wait\n",
      "all things must pass.\n",
      "the storm and lover.\n",
      "the restle.\n",
      "the same doctor and action when you want to the storm.\n",
      "the same things are away to truth without and life.\n",
      "be don't be an all in "
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/douwe/Dropbox/Douwe/Proj/notebooks/venv3/lib/python3.5/site-packages/ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in log\n",
      "  after removing the cwd from sys.path.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the same contempt.\n",
      "learn like a storm the same cloud and things man war and best to stards of the storm.\n",
      "the same childrer.\n",
      "the strength.\n",
      "let the rest's not the same problems and best the don't shame to the storm.\n",
      "the same childrer or\n",
      "\n",
      "----- diversity: 0.5\n",
      "----- Generating with seed: \"ome to those who wait\n",
      "all things must pa\"\n",
      "ome to those who wait\n",
      "all things must path is better than takes the beginning, the step.\n",
      "how good and sable that is always richer of the reade the doin and live a problem of the other than the night things hear does not development of a schere and surrog.\n",
      "the liad.\n",
      "a borrow than it is the reading is anything of the little all time for the famed.\n",
      "the you can't bothing to his morer things makes the like things belier.\n",
      "stingour things alwa\n",
      "\n",
      "----- diversity: 1.0\n",
      "----- Generating with seed: \"ome to those who wait\n",
      "all things must pa\"\n",
      "ome to those who wait\n",
      "all things must pain longing from have being particular confquerates out of the public man is and weat\n",
      "the pain.\n",
      "it offertones how down't don't the heat 'effer bark)\n",
      "achievion and accention.\n",
      "man day\n",
      "if ne's abounalwith in every, then egwisonsion are better traufl's empty peached at even if a god, never forge water the straw some the puck the sun interrot than.\n",
      "there engled and noped of the poterntly you never fone,\n",
      "\n",
      "----- diversity: 1.2\n",
      "----- Generating with seed: \"ome to those who wait\n",
      "all things must pa\"\n",
      "ome to those who wait\n",
      "all things must path but in divinerse to twice if the smiling speccard.\n",
      "all a pounds\n",
      "en'we'm. .\n",
      "adversies when the same.\n",
      "never out equial man workhever solloas port is ahil tomoren dies for w.)\n",
      "moteloy sneed must 'gemired to spoin of your abitts, distiec\n",
      "as pach.\n",
      "\n",
      "there's it.\n",
      "don't let the soff pase\n",
      "litiry, say and los a dreamary rich is many reabovy as a great you wally gonify\n",
      "womarwhorher work.\n",
      "hencious temperate\n",
      "\n",
      "--------------------------------------------------\n",
      "Iteration 2\n",
      "Epoch 1/5\n",
      "36634/36634 [==============================] - 24s 643us/step - loss: 1.0959 - acc: 0.6596\n",
      "Epoch 2/5\n",
      "36634/36634 [==============================] - 22s 602us/step - loss: 1.0739 - acc: 0.6671\n",
      "Epoch 3/5\n",
      "36634/36634 [==============================] - 28s 755us/step - loss: 1.0581 - acc: 0.6694\n",
      "Epoch 4/5\n",
      "36634/36634 [==============================] - 30s 809us/step - loss: 1.0390 - acc: 0.6767\n",
      "Epoch 5/5\n",
      "36634/36634 [==============================] - 27s 731us/step - loss: 1.0233 - acc: 0.6822\n",
      "\n",
      "----- diversity: 0.2\n",
      "----- Generating with seed: \"l.\n",
      "sticks and stones may break my bones \"\n",
      "l.\n",
      "sticks and stones may break my bones is a good them.\n",
      "the most faith is the sucks the most little all that you can't can't bed and strick in the becausun a stone.\n",
      "the most suck the devil is the best all in the most fishian difficult is the hight to a beheselve a man is a good commands to the most price of the price of the princip of the most price of a common and learn all in the best all in the best all intuitive is always remothon.\n",
      "\n",
      "\n",
      "----- diversity: 0.5\n",
      "----- Generating with seed: \"l.\n",
      "sticks and stones may break my bones \"\n",
      "l.\n",
      "sticks and stones may break my bones of a point of the bad is not the bather is shine is news work.\n",
      "the own lip.\n",
      "it is everything is worth the right.\n",
      "it is the stone.\n",
      "what make slive a good fairud.\n",
      "first liter.\n",
      "the port to the side is another man's indiess have to go is money to rise, the side of a suck know.\n",
      "anything is shall before a beaur lies and learn like a day.\n",
      "don't be open the prosper.\n",
      "the devil feet to don't wish.\n",
      "what to t\n",
      "\n",
      "----- diversity: 1.0\n",
      "----- Generating with seed: \"l.\n",
      "sticks and stones may break my bones \"\n",
      "l.\n",
      "sticks and stones may break my bones is hed ir pick is end cnies what you virtuation.\n",
      "the hingy as fair if a fishialls the fahuln' alothers at learnion like a man's devil, that does not don't because one steccy\n",
      "the prediciate\n",
      "plau't is me\n",
      "your adviess, and he who yought.\n",
      "where that life and childrens are still is a youthe theirs chech the storm.\n",
      "make it.\n",
      "lucker.\n",
      "don't with how ebersts teak in slack beforl.\n",
      "it is fore of all through b\n",
      "\n",
      "----- diversity: 1.2\n",
      "----- Generating with seed: \"l.\n",
      "sticks and stones may break my bones \"\n",
      "l.\n",
      "sticks and stones may break my bones and truef\n",
      "fives that greeit eek or disiarate a lotive befor what yen tritef.\n",
      "my ut shifine\n",
      "thousamess.\n",
      "never falits are will to ming step\n",
      "knock.\n",
      "don't pleasuriked is behe world)\n",
      "he bepould.\n",
      "estrrands apronsing is it is worthy to oldes not.\n",
      "he . you have got ethire\n",
      "the polit.\n",
      "\n",
      "iddom, in our grow unday put variety he had one say be start and love of our eled about is the real what downg, improve the\n",
      "\n",
      "--------------------------------------------------\n",
      "Iteration 3\n",
      "Epoch 1/5\n",
      "36634/36634 [==============================] - 30s 809us/step - loss: 1.0107 - acc: 0.6865\n",
      "Epoch 2/5\n",
      "36634/36634 [==============================] - 29s 803us/step - loss: 0.9988 - acc: 0.6872\n",
      "Epoch 3/5\n",
      "36634/36634 [==============================] - 29s 787us/step - loss: 0.9872 - acc: 0.6894\n",
      "Epoch 4/5\n",
      "36634/36634 [==============================] - 27s 736us/step - loss: 0.9785 - acc: 0.6962\n",
      "Epoch 5/5\n",
      "36634/36634 [==============================] - 26s 710us/step - loss: 0.9618 - acc: 0.7004\n",
      "\n",
      "----- diversity: 0.2\n",
      "----- Generating with seed: \"nly the good die young.\n",
      "only the lead do\"\n",
      "nly the good die young.\n",
      "only the lead down to spryne is and reason back is not the most traws to be a spich and learn a straw\n",
      "the black the saint.\n",
      "life.\n",
      "very woman man spend.\n",
      "and the most fishishe still is and learn all history fortunes to the reade the most fishishe still life, the devil you not know thiring the same spoies\n",
      "the price of the spich is the most fishishe strengthe the black the same closery than not the same spoies\n",
      "the sam\n",
      "\n",
      "----- diversity: 0.5\n",
      "----- Generating with seed: \"nly the good die young.\n",
      "only the lead do\"\n",
      "nly the good die young.\n",
      "only the lead down and success to a storm.\n",
      "the devil don't be sure.\n",
      "the world for the devil\n",
      "time for the money is words so arounse some life.\n",
      "visuce the more\n",
      "the art is always raild will come to the same people to a friend.\n",
      "the world procentiness laught make many a single spoils.\n",
      "what but words absolutell)\n",
      "whee the beginning to the spice of the price, better to have to can chope fears.\n",
      "the black marter.\n",
      "don't be \n",
      "\n",
      "----- diversity: 1.0\n",
      "----- Generating with seed: \"nly the good die young.\n",
      "only the lead do\"\n",
      "nly the good die young.\n",
      "only the lead does not keeps to success or bencledic.\n",
      "tage is never keep the ward or socialt. yould for in't better than keep no luriness? easy to reason to go of its cowd, rase man sacrinines\n",
      "anything boaty, you saved out thing got chay to one lips, not last with more.)\n",
      "anythinesment fat va inverns, but cridition does not can all imade no dead don't necessity how words in all of eur most closers.\n",
      "fratear for tow\n",
      "\n",
      "----- diversity: 1.2\n",
      "----- Generating with seed: \"nly the good die young.\n",
      "only the lead do\"\n",
      "nly the good die young.\n",
      "only the lead doeng' life a more religion first liuts dannousl.\n",
      "vilter of faith is bates, and watic hands.\n",
      "part is this ghing.\n",
      "know will sayh far lackes grown may teats. .\n",
      "iwart of your untilly.\n",
      "cir y's\n",
      "keepll\n",
      "be chopbly until our faure when a heap\n",
      "the end hoftent. (cchilself for which back. (hopts and live on their penny\n",
      "i.\n",
      "mere nothing.\n",
      "the bige to wealthy tomath.\n",
      "'ts\n",
      "enttule, drive. (stegwity.\n",
      "things alwage at\n"
     ]
    }
   ],
   "source": [
    "for iteration in range(1, 4):\n",
    "    print()\n",
    "    print('-' * 50)\n",
    "    print('Iteration', iteration)\n",
    "    model.fit(X, y,\n",
    "              batch_size=256,\n",
    "              epochs=5)\n",
    "\n",
    "    start_index = random.randrange(len(text) - maxlen - 1)\n",
    "    for diversity in [0.2, 0.5, 1.0, 1.2]:\n",
    "        print()\n",
    "        print('----- diversity:', diversity)\n",
    "\n",
    "        generated = ''\n",
    "        sentence = text[start_index: start_index + maxlen]\n",
    "        generated += sentence\n",
    "        print('----- Generating with seed: \"' + sentence + '\"')\n",
    "        sys.stdout.write(generated)\n",
    "\n",
    "        for i in range(400):\n",
    "            x = np.zeros((1, maxlen, len(chars)))\n",
    "            for t, char in enumerate(sentence):\n",
    "                x[0, t, char_indices[char]] = 1.\n",
    "\n",
    "            preds = model.predict(x, verbose=0)[0]\n",
    "            next_index = sample(preds, diversity)\n",
    "            next_char = indices_char[next_index]\n",
    "\n",
    "            generated += next_char\n",
    "            sentence = sentence[1:] + next_char\n",
    "\n",
    "            sys.stdout.write(next_char)\n",
    "            sys.stdout.flush()\n",
    "        print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[name: \"/device:CPU:0\"\n",
      "device_type: \"CPU\"\n",
      "memory_limit: 268435456\n",
      "locality {\n",
      "}\n",
      "incarnation: 12827226444526493741\n",
      "]\n"
     ]
    }
   ],
   "source": [
    "from tensorflow.python.client import device_lib\n",
    "print(device_lib.list_local_devices())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save('sayings.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import h5py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"keras_version\": \"2.1.2\", \"class_name\": \"Sequential\", \"backend\": \"tensorflow\", \"config\": [{\"class_name\": \"LSTM\", \"config\": {\"unit_forget_bias\": true, \"batch_input_shape\": [null, 40, 57], \"return_state\": false, \"recurrent_activation\": \"hard_sigmoid\", \"kernel_initializer\": {\"class_name\": \"VarianceScaling\", \"config\": {\"scale\": 1.0, \"mode\": \"fan_avg\", \"seed\": null, \"distribution\": \"uniform\"}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"implementation\": 1, \"dropout\": 0.0, \"units\": 128, \"recurrent_dropout\": 0.0, \"name\": \"lstm_1\", \"activation\": \"tanh\", \"go_backwards\": false, \"recurrent_constraint\": null, \"recurrent_initializer\": {\"class_name\": \"Orthogonal\", \"config\": {\"seed\": null, \"gain\": 1.0}}, \"trainable\": true, \"kernel_regularizer\": null, \"use_bias\": true, \"stateful\": false, \"dtype\": \"float32\", \"activity_regularizer\": null, \"bias_regularizer\": null, \"bias_constraint\": null, \"return_sequences\": false, \"recurrent_regularizer\": null, \"unroll\": false, \"kernel_constraint\": null}}, {\"class_name\": \"Dense\", \"config\": {\"activation\": \"linear\", \"trainable\": true, \"kernel_regularizer\": null, \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"VarianceScaling\", \"config\": {\"scale\": 1.0, \"mode\": \"fan_avg\", \"seed\": null, \"distribution\": \"uniform\"}}, \"activity_regularizer\": null, \"bias_regularizer\": null, \"bias_constraint\": null, \"units\": 57, \"name\": \"dense_1\", \"kernel_constraint\": null, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}}}, {\"class_name\": \"Activation\", \"config\": {\"name\": \"activation_1\", \"activation\": \"softmax\", \"trainable\": true}}]}'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.to_json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('keras_js/chars.js', 'w') as fout:\n",
    "    fout.write('maxlen = ' + str(maxlen) + '\\n')\n",
    "    fout.write('num_chars = ' + str(len(chars)) + '\\n')\n",
    "    fout.write('char_indices = ' + json.dumps(char_indices, indent=2) + '\\n')\n",
    "    fout.write('indices_char = ' + json.dumps(indices_char, indent=2) + '\\n')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\xa0'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'\\xa0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "'’' "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
