{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "qjUYDME9kgWS",
    "outputId": "ee5a458d-cc2e-4ed4-bf32-077fecb227d6"
   },
   "source": [
    "# Albert TFlite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install tf-transformers\n",
    "\n",
    "!pip install sentencepiece\n",
    "\n",
    "!pip install tensorflow-text\n",
    "\n",
    "!pip install transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "cy6shXrXlL_D",
    "outputId": "74fbc1e8-0861-4d1f-9741-f70b8bf7516a"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tensorflow version 2.7.0\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Supper TF warnings\n",
    "\n",
    "import tensorflow as tf\n",
    "print(\"Tensorflow version\", tf.__version__)\n",
    "\n",
    "from tf_transformers.models import AlbertModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "0HEJnnnFlPxR"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "P89IVu5JlREX"
   },
   "source": [
    "### Convert a Model to TFlite\n",
    "\n",
    "The most important thing to notice here is that, if we want to convert a model to ```tflite```, we have to ensure that ```inputs``` to the model are **deterministic**, which means inputs should not be dynamic. We have to fix  **batch_size**, **sequence_length** and other related input constraints depends on the model of interest.\n",
    "\n",
    "### Load Albert Model\n",
    "\n",
    "1. Fix the inputs\n",
    "2. We can always check the ```model``` **inputs** and **output** by using ```model.input``` and ```model.output```.\n",
    "3. We use ```batch_size=1``` and ```sequence_length=64```.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "WV-Ygv4Mlnlo",
    "outputId": "06b4df6a-7304-423d-cb88-4d9ed0759c6e"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:absl:Successful ✅✅: Model checkpoints matched and loaded from /root/.cache/huggingface/hub/tftransformers--albert-base-v2.main.999c3eeace9b4d2c3f2ad87aad4548b3b73ea3cc/ckpt-1\n",
      "INFO:absl:Successful ✅: Loaded model from tftransformers/albert-base-v2\n"
     ]
    }
   ],
   "source": [
    "model_name = 'albert-base-v2'\n",
    "batch_size = 1\n",
    "sequence_length = 64\n",
    "model = AlbertModel.from_pretrained(model_name, batch_size=batch_size, sequence_length=sequence_length)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "edB8Qpo2mgZD"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "uL7UDdp9m9ty"
   },
   "source": [
    "## Verify Models inputs and outputs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "SL8wv3mjnCaE",
    "outputId": "9fcce560-b612-43ac-dd93-5df464007aaf"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model inputs {'input_ids': <KerasTensor: shape=(1, 64) dtype=int32 (created by layer 'input_ids')>, 'input_mask': <KerasTensor: shape=(1, 64) dtype=int32 (created by layer 'input_mask')>, 'input_type_ids': <KerasTensor: shape=(1, 64) dtype=int32 (created by layer 'input_type_ids')>}\n",
      "Model outputs {'cls_output': <KerasTensor: shape=(1, 768) dtype=float32 (created by layer 'tf_transformers/albert')>, 'token_embeddings': <KerasTensor: shape=(1, 64, 768) dtype=float32 (created by layer 'tf_transformers/albert')>, 'token_logits': <KerasTensor: shape=(1, 64, 30000) dtype=float32 (created by layer 'tf_transformers/albert')>, 'last_token_logits': <KerasTensor: shape=(1, 30000) dtype=float32 (created by layer 'tf_transformers/albert')>}\n"
     ]
    }
   ],
   "source": [
    "print(\"Model inputs\", model.input)\n",
    "print(\"Model outputs\", model.output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "JN6QpCnznFNR"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "k97vFtrSnMGd"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "SGBPiXy8nMjQ"
   },
   "source": [
    "## Save Model as Serialized Version\n",
    "\n",
    "We have to save the model using ```model.save```. We use the ```SavedModel``` for converting it to ```tflite```."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "HeDAyaXznZiX",
    "outputId": "648e46e2-714d-4c48-aa7c-e22e9ebe4f4e"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:Found untraced functions such as word_embeddings_layer_call_fn, word_embeddings_layer_call_and_return_conditional_losses, type_embeddings_layer_call_fn, type_embeddings_layer_call_and_return_conditional_losses, positional_embeddings_layer_call_fn while saving (showing 5 of 125). These functions will not be directly callable after loading.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: albert-base-v2/saved_model/assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: albert-base-v2/saved_model/assets\n"
     ]
    }
   ],
   "source": [
    "model.save(\"{}/saved_model\".format(model_name), save_format='tf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "id": "DGIccdmJnj_5"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-HOEyoodnvoU"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "XTLzBJAGnv2m"
   },
   "source": [
    "## Convert SavedModel to TFlite"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "SqC2HJ10nywO",
    "outputId": "6e80b65e-a2fb-478b-dcff-6b66a66e846d"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:Buffer deduplication procedure will be skipped when flatbuffer library is not properly loaded\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TFlite conversion succesful\n"
     ]
    }
   ],
   "source": [
    "converter = tf.lite.TFLiteConverter.from_saved_model(\"{}/saved_model\".format(model_name)) # path to the SavedModel directory\n",
    "converter.experimental_new_converter = True\n",
    "\n",
    "tflite_model = converter.convert()\n",
    "\n",
    "open(\"{}/saved_model.tflite\".format(model_name), \"wb\").write(tflite_model)\n",
    "print(\"TFlite conversion succesful\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "eBqTsjUToG7I"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "FdIFext9ta6E"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "bDnheGa_tmZq"
   },
   "source": [
    "## Load TFlite Model "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "id": "vHzESzPAtpHd"
   },
   "outputs": [],
   "source": [
    "# Load the TFLite model and allocate tensors.\n",
    "interpreter = tf.lite.Interpreter(model_path=\"{}/saved_model.tflite\".format(model_name))\n",
    "interpreter.allocate_tensors()\n",
    "\n",
    "# Get input and output tensors.\n",
    "input_details = interpreter.get_input_details()\n",
    "output_details = interpreter.get_output_details()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "id": "GTYdEgPatzVk"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "C5fUX6ZqxefF"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "u1I0ZJ-XxfDg"
   },
   "source": [
    "## Assert TFlite Model and Keras Model outputs\n",
    "\n",
    "After conversion we have to assert the model outputs using\n",
    "```tflite``` and ```Keras``` model, to ensure proper conversion.\n",
    "\n",
    "1. Create examples using ```tf.random.uniform```. \n",
    "2. Check outputs using both models.\n",
    "3. Note: We need slightly higher ```rtol``` here to assert."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "QnYr9D5Ot6t4",
    "outputId": "8f6c5f60-7c06-4b6d-aa8b-108b57d11fee"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Outputs asserted and succesful:  ✅\n"
     ]
    }
   ],
   "source": [
    "# Dummy Examples \n",
    "input_ids = tf.random.uniform(minval=0, maxval=100, shape=(batch_size, sequence_length), dtype=tf.int32)\n",
    "input_mask = tf.ones_like(input_ids)\n",
    "input_type_ids = tf.zeros_like(input_ids)\n",
    "\n",
    "\n",
    "# input type ids\n",
    "interpreter.set_tensor(\n",
    "    input_details[0]['index'],\n",
    "    input_type_ids,\n",
    ")\n",
    "# input_mask\n",
    "interpreter.set_tensor(input_details[1]['index'], input_mask)\n",
    "\n",
    "# input ids\n",
    "interpreter.set_tensor(\n",
    "    input_details[2]['index'],\n",
    "    input_ids,\n",
    ")\n",
    "\n",
    "# Invoke inputs\n",
    "interpreter.invoke()\n",
    "# Take last output\n",
    "tflite_output = interpreter.get_tensor(output_details[-1]['index'])\n",
    "\n",
    "# Keras Model outputs .\n",
    "model_inputs = {'input_ids': input_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids}\n",
    "model_outputs = model(model_inputs)\n",
    "\n",
    "# We need a slightly higher rtol here to assert :-)\n",
    "tf.debugging.assert_near(tflite_output, model_outputs['token_logits'], rtol=3.0)\n",
    "print(\"Outputs asserted and succesful:  ✅\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "q8n7FuUgw95j"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "name": "albert_tflite.ipynb",
   "provenance": []
  },
  "jupytext": {
   "formats": "ipynb,md:myst"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}