diff --git a/setup_llm_on_google_colab_gpu_accelerated.ipynb b/setup_llm_on_google_colab_gpu_accelerated.ipynb
new file mode 100644
index 0000000..8e5815c
--- /dev/null
+++ b/setup_llm_on_google_colab_gpu_accelerated.ipynb
@@ -0,0 +1,892 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Download and Run LLM's on Google Colab (GPU-accelerated)"
+ ],
+ "metadata": {
+ "id": "Iz-w0gCPmW8_"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "## Step 1: Get access token from huggingface"
+ ],
+ "metadata": {
+ "id": "3Rq389lywEiu"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Step 2: Install packages"
+ ],
+ "metadata": {
+ "id": "AhEDYalWkv7b"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!python -V #Python 3.10.12"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "-VuenKPqoNfS",
+ "outputId": "684fc88a-f112-4e08-9d26-b3e38ba8331e"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Python 3.10.12\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!nvcc --version # find the CUDA driver build above"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "nj8FZCXshH-Y",
+ "outputId": "94e32e2a-dca5-475a-8428-deba8c297797"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "nvcc: NVIDIA (R) Cuda compiler driver\n",
+ "Copyright (c) 2005-2023 NVIDIA Corporation\n",
+ "Built on Tue_Aug_15_22:02:13_PDT_2023\n",
+ "Cuda compilation tools, release 12.2, V12.2.140\n",
+ "Build cuda_12.2.r12.2/compiler.33191640_0\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Install key libraries for LLM\n",
+ "\n",
+ "#Install llama-cpp-python with CUBLAS, compatible to CUDA 12.2 which is the CUDA driver build above\n",
+ "!set LLAMA_CUBLAS=1\n",
+ "!set CMAKE_ARGS=-DLLAMA_CUBLAS=on\n",
+ "!set FORCE_CMAKE=1\n",
+ "\n",
+ "#Install llama-cpp-python, cuda-enabled package\n",
+ "!python -m pip install llama-cpp-python==0.2.7 --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122\n",
+ "\n",
+ "#Install pytorch-related, cuda-enabled package\n",
+ "!pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "-BfhqNzQk06m",
+ "outputId": "25910e79-e004-46ba-cfc8-3e0566bf8650"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Looking in indexes: https://pypi.org/simple, https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122\n",
+ "Collecting llama-cpp-python==0.2.7\n",
+ " Downloading https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/wheels/llama_cpp_python-0.2.7%2Bcu122-cp310-cp310-manylinux_2_31_x86_64.whl (14.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.8/14.8 MB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.7) (4.12.1)\n",
+ "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.7) (1.25.2)\n",
+ "Collecting diskcache>=5.6.1 (from llama-cpp-python==0.2.7)\n",
+ " Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: diskcache, llama-cpp-python\n",
+ "Successfully installed diskcache-5.6.3 llama-cpp-python-0.2.7+cu122\n",
+ "Looking in indexes: https://download.pytorch.org/whl/cu121\n",
+ "Requirement already satisfied: torch==2.3.0 in /usr/local/lib/python3.10/dist-packages (2.3.0+cu121)\n",
+ "Requirement already satisfied: torchvision==0.18.0 in /usr/local/lib/python3.10/dist-packages (0.18.0+cu121)\n",
+ "Requirement already satisfied: torchaudio==2.3.0 in /usr/local/lib/python3.10/dist-packages (2.3.0+cu121)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.14.0)\n",
+ "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (4.12.1)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (1.12.1)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.3)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.1.4)\n",
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (2023.6.0)\n",
+ "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m61.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-nccl-cu12==2.20.5 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.2/176.2 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (2.3.0)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision==0.18.0) (1.25.2)\n",
+ "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision==0.18.0) (9.4.0)\n",
+ "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.3.0)\n",
+ " Downloading https://download.pytorch.org/whl/cu121/nvidia_nvjitlink_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (19.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.3.0) (2.1.5)\n",
+ "Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.3.0) (1.3.0)\n",
+ "Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n",
+ "Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.1.105 nvidia-nvtx-cu12-12.1.105\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Working for GPU\n",
+ "%%writefile gpu_requirements.txt\n",
+ "annotated-types==0.7.0\n",
+ "anyio==4.4.0\n",
+ "certifi==2022.12.7\n",
+ "charset-normalizer==2.1.1\n",
+ "click==8.1.7\n",
+ "colorama==0.4.6\n",
+ "diskcache==5.6.3\n",
+ "dnspython==2.6.1\n",
+ "email_validator==2.1.1\n",
+ "exceptiongroup==1.2.1\n",
+ "filelock==3.13.1\n",
+ "fsspec==2024.6.0\n",
+ "h11==0.14.0\n",
+ "httpcore==1.0.5\n",
+ "httptools==0.6.1\n",
+ "httpx==0.27.0\n",
+ "huggingface-hub==0.23.3\n",
+ "idna==3.4\n",
+ "Jinja2==3.1.4\n",
+ "llama_cpp_python==0.2.7+cu122\n",
+ "markdown-it-py==3.0.0\n",
+ "MarkupSafe==2.1.5\n",
+ "mdurl==0.1.2\n",
+ "mpmath==1.3.0\n",
+ "networkx==3.2.1\n",
+ "numpy==1.26.4\n",
+ "orjson==3.10.3\n",
+ "packaging==24.0\n",
+ "pillow==10.2.0\n",
+ "pydantic==2.7.3\n",
+ "pydantic_core==2.18.4\n",
+ "Pygments==2.18.0\n",
+ "python-dotenv==1.0.1\n",
+ "python-multipart==0.0.9\n",
+ "PyYAML==6.0.1\n",
+ "requests==2.28.1\n",
+ "rich==13.7.1\n",
+ "shellingham==1.5.4\n",
+ "sniffio==1.3.1\n",
+ "starlette==0.37.2\n",
+ "sympy==1.12\n",
+ "torch==2.3.0+cu121\n",
+ "torchaudio==2.3.0+cu121\n",
+ "torchvision==0.18.0+cu121\n",
+ "tqdm==4.66.4\n",
+ "typer==0.12.3\n",
+ "typing_extensions==4.12.1\n",
+ "ujson==5.10.0\n",
+ "watchfiles==0.22.0"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Du8AaWpgpUeu",
+ "outputId": "824a8c74-0f7f-4806-c94f-19f8effb3c9d"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Overwriting gpu_requirements.txt\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "hdVSk5iZ1DVB",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "94b5de4e-c6ad-4e97-cc38-bc8f8e7f1d79"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/86.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m155.3/155.3 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.9/176.9 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m401.7/401.7 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.5/61.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m59.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m51.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m56.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.8/143.8 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "ipython 7.34.0 requires jedi>=0.16, which is not installed.\n",
+ "gcsfs 2023.6.0 requires fsspec==2023.6.0, but you have fsspec 2024.6.0 which is incompatible.\n",
+ "google-colab 1.0.0 requires requests==2.31.0, but you have requests 2.28.1 which is incompatible.\n",
+ "imageio 2.31.6 requires pillow<10.1.0,>=8.3.2, but you have pillow 10.2.0 which is incompatible.\n",
+ "jupyter-server 1.24.0 requires anyio<4,>=3.1.0, but you have anyio 4.4.0 which is incompatible.\n",
+ "kaggle 1.6.14 requires certifi>=2023.7.22, but you have certifi 2022.12.7 which is incompatible.\n",
+ "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
+ "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
+ "yfinance 0.2.40 requires requests>=2.31, but you have requests 2.28.1 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install -r gpu_requirements.txt #it's normal to see incompatiblity errors; the most important packages have been installed correctly"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Step 3: Download almost any huggingface LLM\n",
+ "\n",
+ "Use the \"hf_hub_download\" function to download models on huggingface using the access token from Step 1. In this case, I want to download a small model of the \"llava-1.6-mistral-7b-gguf\" model from \"cjpais\"'s huggingface repository.\n"
+ ],
+ "metadata": {
+ "id": "efJ_g2etSvLD"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "Rpfqeh-wxzvV"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import torch\n",
+ "import huggingface_hub"
+ ],
+ "metadata": {
+ "id": "kpXQGhHlij6q"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import os\n",
+ "from huggingface_hub import hf_hub_download\n",
+ "\n",
+ "# Function to read the token from a file\n",
+ "def read_token(file_path):\n",
+ " try:\n",
+ " with open(file_path, 'r') as file:\n",
+ " return file.readline().strip()\n",
+ " except FileNotFoundError:\n",
+ " raise ValueError(f\"Token file not found: {file_path}\")\n",
+ "\n",
+ "# Define the model name and file\n",
+ "model_name = \"cjpais/llava-1.6-mistral-7b-gguf\"\n",
+ "model_file = \"llava-v1.6-mistral-7b.Q3_K_XS.gguf\"\n",
+ "\n",
+ "\n",
+ "# Download the model from Hugging Face Hub\n",
+ "model_path = hf_hub_download(\n",
+ " model_name,\n",
+ " filename=model_file,\n",
+ " local_dir='models/', # Download the model to the \"models\" folder\n",
+ " token=\"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\" #Replace this token from huggingface with your own token (Setting -> Access Toekns -> New token -> Generate Token)\n",
+ ")\n",
+ "\n",
+ "print(\"My model path:\", model_path)"
+ ],
+ "metadata": {
+ "id": "WsYot3Rz1NVf",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 170,
+ "referenced_widgets": [
+ "9b2c72c88ad449208a96410551efa5a7",
+ "7f142927bd0947d49acecfea26463ae6",
+ "50281819a51b4454a2f6ed4abda3b935",
+ "6439a70317e447029661c88b7c652d53",
+ "a89162677c0d4d95b3f5520bd036270c",
+ "b5dd981b8ed84d2e9ca599a9537eb387",
+ "f26936644c804771bf09975be21f4aa1",
+ "9cc2e5b6551b47e08308355200a60234",
+ "765b4ebd1d144f59b116a623b0428e14",
+ "461b9e9a6a254fe384a5003adc0dbc29",
+ "037bd58fa08d4baa87aaa2fd12df44c5"
+ ]
+ },
+ "outputId": "0e0b711e-6e60-42b7-dd30-18c63f138860"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "llava-v1.6-mistral-7b.Q3_K_XS.gguf: 0%| | 0.00/2.99G [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "9b2c72c88ad449208a96410551efa5a7"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "My model path: models/llava-v1.6-mistral-7b.Q3_K_XS.gguf\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from llama_cpp import Llama\n",
+ "\n",
+ "# model_path is location of to the GGUF model that you've download from HuggingFace on Colab\n",
+ "model_path = \"/content/models/llava-v1.6-mistral-7b.Q3_K_XS.gguf\"\n",
+ "\n",
+ "#load the LLM\n",
+ "llm = Llama(model_path=model_path,\n",
+ " n_gpu_layers=-1) #load model while enabling GPU"
+ ],
+ "metadata": {
+ "id": "w2Y9Vn_nbFDM"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Note that BLAS = 1 means GPU is enabled:\n",
+ "* AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 |\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "RFMXgvWzckbU"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Step 4: Ask the LLM a question"
+ ],
+ "metadata": {
+ "id": "U4GldNtm_s-P"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "user_question = \"What is the earliest civilization in the world?\" # @param {type:\"string\"}"
+ ],
+ "metadata": {
+ "id": "AD7MiXnRl1Kr"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import time\n",
+ "\n",
+ "# Prompt creation\n",
+ "system_message = \"You are a helpful assistant\"\n",
+ "user_message = \"Q: \"+ user_question+ \" A: \"\n",
+ "\n",
+ "# Start the timer\n",
+ "start_time = time.time()\n",
+ "\n",
+ "prompt = f\"\"\"[INST] <>\n",
+ "{system_message}\n",
+ "<>\n",
+ "{user_message} [/INST]\"\"\"\n",
+ "\n",
+ "# Run the model\n",
+ "output = llm(\n",
+ " prompt, # Prompt\n",
+ " max_tokens=2000, # Generate up to 2,000 tokens\n",
+ " stop=[\"Q:\", \"\\n\"], # Stop generating just before the model would generate a new question\n",
+ " #echo=True # Echo the prompt back in the output\n",
+ ")\n",
+ "\n",
+ "# Stop the timer\n",
+ "end_time = time.time()\n",
+ "\n",
+ "# Get model response\n",
+ "print(output[\"choices\"][0][\"text\"])\n",
+ "\n",
+ "# Calculate runtime\n",
+ "runtime = end_time - start_time\n",
+ "print(\"response run time is: \", runtime)"
+ ],
+ "metadata": {
+ "id": "kN73qG6-yg33"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "machine_shape": "hm",
+ "provenance": [],
+ "gpuClass": "premium",
+ "gpuType": "T4",
+ "toc_visible": true,
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "9b2c72c88ad449208a96410551efa5a7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_7f142927bd0947d49acecfea26463ae6",
+ "IPY_MODEL_50281819a51b4454a2f6ed4abda3b935",
+ "IPY_MODEL_6439a70317e447029661c88b7c652d53"
+ ],
+ "layout": "IPY_MODEL_a89162677c0d4d95b3f5520bd036270c"
+ }
+ },
+ "7f142927bd0947d49acecfea26463ae6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b5dd981b8ed84d2e9ca599a9537eb387",
+ "placeholder": "",
+ "style": "IPY_MODEL_f26936644c804771bf09975be21f4aa1",
+ "value": "llava-v1.6-mistral-7b.Q3_K_XS.gguf: 100%"
+ }
+ },
+ "50281819a51b4454a2f6ed4abda3b935": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9cc2e5b6551b47e08308355200a60234",
+ "max": 2991290624,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_765b4ebd1d144f59b116a623b0428e14",
+ "value": 2991290624
+ }
+ },
+ "6439a70317e447029661c88b7c652d53": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_461b9e9a6a254fe384a5003adc0dbc29",
+ "placeholder": "",
+ "style": "IPY_MODEL_037bd58fa08d4baa87aaa2fd12df44c5",
+ "value": " 2.99G/2.99G [00:26<00:00, 129MB/s]"
+ }
+ },
+ "a89162677c0d4d95b3f5520bd036270c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b5dd981b8ed84d2e9ca599a9537eb387": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f26936644c804771bf09975be21f4aa1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9cc2e5b6551b47e08308355200a60234": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "765b4ebd1d144f59b116a623b0428e14": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "461b9e9a6a254fe384a5003adc0dbc29": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "037bd58fa08d4baa87aaa2fd12df44c5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file