diff --git a/Setup_LLM_on_Google_Colab_GPU_Accelerated.ipynb b/Setup_LLM_on_Google_Colab_GPU_Accelerated.ipynb
deleted file mode 100644
index de80a28..0000000
--- a/Setup_LLM_on_Google_Colab_GPU_Accelerated.ipynb
+++ /dev/null
@@ -1,893 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "view-in-github",
- "colab_type": "text"
- },
- "source": [
- "
"
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "# Download and Run LLM's on Google Colab (GPU-accelerated)"
- ],
- "metadata": {
- "id": "Iz-w0gCPmW8_"
- }
- },
- {
- "cell_type": "markdown",
- "source": [
- "\n",
- "## Step 1: Get access token from huggingface"
- ],
- "metadata": {
- "id": "3Rq389lywEiu"
- }
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Step 2: Install packages"
- ],
- "metadata": {
- "id": "AhEDYalWkv7b"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "!python -V #Python 3.10.12"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "-VuenKPqoNfS",
- "outputId": "684fc88a-f112-4e08-9d26-b3e38ba8331e"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Python 3.10.12\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "!nvcc --version # find the CUDA driver build above"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "nj8FZCXshH-Y",
- "outputId": "94e32e2a-dca5-475a-8428-deba8c297797"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "nvcc: NVIDIA (R) Cuda compiler driver\n",
- "Copyright (c) 2005-2023 NVIDIA Corporation\n",
- "Built on Tue_Aug_15_22:02:13_PDT_2023\n",
- "Cuda compilation tools, release 12.2, V12.2.140\n",
- "Build cuda_12.2.r12.2/compiler.33191640_0\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# Install key libraries for LLM\n",
- "\n",
- "#Install llama-cpp-python with CUBLAS, compatible to CUDA 12.2 which is the CUDA driver build above\n",
- "!set LLAMA_CUBLAS=1\n",
- "!set CMAKE_ARGS=-DLLAMA_CUBLAS=on\n",
- "!set FORCE_CMAKE=1\n",
- "\n",
- "#Install llama-cpp-python, cuda-enabled package\n",
- "!python -m pip install llama-cpp-python==0.2.7 --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122\n",
- "\n",
- "#Install pytorch-related, cuda-enabled package\n",
- "!pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "-BfhqNzQk06m",
- "outputId": "25910e79-e004-46ba-cfc8-3e0566bf8650"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Looking in indexes: https://pypi.org/simple, https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122\n",
- "Collecting llama-cpp-python==0.2.7\n",
- " Downloading https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/wheels/llama_cpp_python-0.2.7%2Bcu122-cp310-cp310-manylinux_2_31_x86_64.whl (14.8 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.8/14.8 MB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.7) (4.12.1)\n",
- "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.7) (1.25.2)\n",
- "Collecting diskcache>=5.6.1 (from llama-cpp-python==0.2.7)\n",
- " Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hInstalling collected packages: diskcache, llama-cpp-python\n",
- "Successfully installed diskcache-5.6.3 llama-cpp-python-0.2.7+cu122\n",
- "Looking in indexes: https://download.pytorch.org/whl/cu121\n",
- "Requirement already satisfied: torch==2.3.0 in /usr/local/lib/python3.10/dist-packages (2.3.0+cu121)\n",
- "Requirement already satisfied: torchvision==0.18.0 in /usr/local/lib/python3.10/dist-packages (0.18.0+cu121)\n",
- "Requirement already satisfied: torchaudio==2.3.0 in /usr/local/lib/python3.10/dist-packages (2.3.0+cu121)\n",
- "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.14.0)\n",
- "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (4.12.1)\n",
- "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (1.12.1)\n",
- "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.3)\n",
- "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.1.4)\n",
- "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (2023.6.0)\n",
- "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m61.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-nccl-cu12==2.20.5 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.2/176.2 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (2.3.0)\n",
- "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision==0.18.0) (1.25.2)\n",
- "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision==0.18.0) (9.4.0)\n",
- "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.3.0)\n",
- " Downloading https://download.pytorch.org/whl/cu121/nvidia_nvjitlink_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (19.8 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.3.0) (2.1.5)\n",
- "Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.3.0) (1.3.0)\n",
- "Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n",
- "Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.1.105 nvidia-nvtx-cu12-12.1.105\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "#Working for GPU\n",
- "%%writefile gpu_requirements.txt\n",
- "annotated-types==0.7.0\n",
- "anyio==4.4.0\n",
- "certifi==2022.12.7\n",
- "charset-normalizer==2.1.1\n",
- "click==8.1.7\n",
- "colorama==0.4.6\n",
- "diskcache==5.6.3\n",
- "dnspython==2.6.1\n",
- "email_validator==2.1.1\n",
- "exceptiongroup==1.2.1\n",
- "filelock==3.13.1\n",
- "fsspec==2024.6.0\n",
- "h11==0.14.0\n",
- "httpcore==1.0.5\n",
- "httptools==0.6.1\n",
- "httpx==0.27.0\n",
- "huggingface-hub==0.23.3\n",
- "idna==3.4\n",
- "Jinja2==3.1.4\n",
- "llama_cpp_python==0.2.7+cu122\n",
- "markdown-it-py==3.0.0\n",
- "MarkupSafe==2.1.5\n",
- "mdurl==0.1.2\n",
- "mpmath==1.3.0\n",
- "networkx==3.2.1\n",
- "numpy==1.26.4\n",
- "orjson==3.10.3\n",
- "packaging==24.0\n",
- "pillow==10.2.0\n",
- "pydantic==2.7.3\n",
- "pydantic_core==2.18.4\n",
- "Pygments==2.18.0\n",
- "python-dotenv==1.0.1\n",
- "python-multipart==0.0.9\n",
- "PyYAML==6.0.1\n",
- "requests==2.28.1\n",
- "rich==13.7.1\n",
- "shellingham==1.5.4\n",
- "sniffio==1.3.1\n",
- "starlette==0.37.2\n",
- "sympy==1.12\n",
- "torch==2.3.0+cu121\n",
- "torchaudio==2.3.0+cu121\n",
- "torchvision==0.18.0+cu121\n",
- "tqdm==4.66.4\n",
- "typer==0.12.3\n",
- "typing_extensions==4.12.1\n",
- "ujson==5.10.0\n",
- "watchfiles==0.22.0"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "Du8AaWpgpUeu",
- "outputId": "9ecac9d5-b0bc-4284-ad68-b556d93bb7a8"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Writing gpu_requirements.txt\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "hdVSk5iZ1DVB",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "94b5de4e-c6ad-4e97-cc38-bc8f8e7f1d79"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/86.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m155.3/155.3 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.9/176.9 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m401.7/401.7 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.5/61.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m59.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m51.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m56.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.8/143.8 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "ipython 7.34.0 requires jedi>=0.16, which is not installed.\n",
- "gcsfs 2023.6.0 requires fsspec==2023.6.0, but you have fsspec 2024.6.0 which is incompatible.\n",
- "google-colab 1.0.0 requires requests==2.31.0, but you have requests 2.28.1 which is incompatible.\n",
- "imageio 2.31.6 requires pillow<10.1.0,>=8.3.2, but you have pillow 10.2.0 which is incompatible.\n",
- "jupyter-server 1.24.0 requires anyio<4,>=3.1.0, but you have anyio 4.4.0 which is incompatible.\n",
- "kaggle 1.6.14 requires certifi>=2023.7.22, but you have certifi 2022.12.7 which is incompatible.\n",
- "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
- "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
- "yfinance 0.2.40 requires requests>=2.31, but you have requests 2.28.1 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
- "source": [
- "!pip install -r gpu_requirements.txt #it's normal to see incompatiblity errors; the most important packages have been installed correctly"
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Step 3: Download almost any huggingface LLM\n",
- "\n",
- "Use the \"hf_hub_download\" function to download models on huggingface using the access token from Step 1. In this case, I am loading the \"llava-1.6-mistral-7b-gguf\" model in \"cjpais\"'s huggingface repository.\n",
- "\n"
- ],
- "metadata": {
- "id": "efJ_g2etSvLD"
- }
- },
- {
- "cell_type": "markdown",
- "source": [
- ""
- ],
- "metadata": {
- "id": "Rpfqeh-wxzvV"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "import torch\n",
- "import huggingface_hub"
- ],
- "metadata": {
- "id": "kpXQGhHlij6q"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "import os\n",
- "from huggingface_hub import hf_hub_download\n",
- "\n",
- "# Function to read the token from a file\n",
- "def read_token(file_path):\n",
- " try:\n",
- " with open(file_path, 'r') as file:\n",
- " return file.readline().strip()\n",
- " except FileNotFoundError:\n",
- " raise ValueError(f\"Token file not found: {file_path}\")\n",
- "\n",
- "# Define the model name and file\n",
- "model_name = \"cjpais/llava-1.6-mistral-7b-gguf\"\n",
- "model_file = \"llava-v1.6-mistral-7b.Q3_K_XS.gguf\"\n",
- "\n",
- "\n",
- "# Download the model from Hugging Face Hub\n",
- "model_path = hf_hub_download(\n",
- " model_name,\n",
- " filename=model_file,\n",
- " local_dir='models/', # Download the model to the \"models\" folder\n",
- " token=\"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\" #Get this token from huggingface\n",
- ")\n",
- "\n",
- "print(\"My model path:\", model_path)"
- ],
- "metadata": {
- "id": "WsYot3Rz1NVf",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 170,
- "referenced_widgets": [
- "9b2c72c88ad449208a96410551efa5a7",
- "7f142927bd0947d49acecfea26463ae6",
- "50281819a51b4454a2f6ed4abda3b935",
- "6439a70317e447029661c88b7c652d53",
- "a89162677c0d4d95b3f5520bd036270c",
- "b5dd981b8ed84d2e9ca599a9537eb387",
- "f26936644c804771bf09975be21f4aa1",
- "9cc2e5b6551b47e08308355200a60234",
- "765b4ebd1d144f59b116a623b0428e14",
- "461b9e9a6a254fe384a5003adc0dbc29",
- "037bd58fa08d4baa87aaa2fd12df44c5"
- ]
- },
- "outputId": "0e0b711e-6e60-42b7-dd30-18c63f138860"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "llava-v1.6-mistral-7b.Q3_K_XS.gguf: 0%| | 0.00/2.99G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "9b2c72c88ad449208a96410551efa5a7"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "My model path: models/llava-v1.6-mistral-7b.Q3_K_XS.gguf\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "from llama_cpp import Llama\n",
- "\n",
- "# model_path is location of to the GGUF model that you've download from HuggingFace on Colab\n",
- "model_path = \"/content/models/llava-v1.6-mistral-7b.Q3_K_XS.gguf\"\n",
- "\n",
- "#load the LLM\n",
- "llm = Llama(model_path=model_path,\n",
- " n_gpu_layers=-1) #load model while enabling GPU"
- ],
- "metadata": {
- "id": "w2Y9Vn_nbFDM"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "Note that BLAS = 1 means GPU is enabled:\n",
- "* AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 |\n",
- "\n"
- ],
- "metadata": {
- "id": "RFMXgvWzckbU"
- }
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Step 4: Ask the LLM a question"
- ],
- "metadata": {
- "id": "U4GldNtm_s-P"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "user_question = \"What is the earliest civilization in the world?\" # @param {type:\"string\"}"
- ],
- "metadata": {
- "id": "AD7MiXnRl1Kr"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "import time\n",
- "\n",
- "# Prompt creation\n",
- "system_message = \"You are a helpful assistant\"\n",
- "user_message = \"Q: \"+ user_question+ \" A: \"\n",
- "\n",
- "# Start the timer\n",
- "start_time = time.time()\n",
- "\n",
- "prompt = f\"\"\"[INST] <>\n",
- "{system_message}\n",
- "<>\n",
- "{user_message} [/INST]\"\"\"\n",
- "\n",
- "# Run the model\n",
- "output = llm(\n",
- " prompt, # Prompt\n",
- " max_tokens=2000, # Generate up to 2,000 tokens\n",
- " stop=[\"Q:\", \"\\n\"], # Stop generating just before the model would generate a new question\n",
- " #echo=True # Echo the prompt back in the output\n",
- ")\n",
- "\n",
- "# Stop the timer\n",
- "end_time = time.time()\n",
- "\n",
- "# Get model response\n",
- "print(output[\"choices\"][0][\"text\"])\n",
- "\n",
- "# Calculate runtime\n",
- "runtime = end_time - start_time\n",
- "print(\"response run time is: \", runtime)"
- ],
- "metadata": {
- "id": "kN73qG6-yg33"
- },
- "execution_count": null,
- "outputs": []
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "machine_shape": "hm",
- "provenance": [],
- "gpuClass": "premium",
- "gpuType": "T4",
- "toc_visible": true,
- "include_colab_link": true
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "name": "python"
- },
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "9b2c72c88ad449208a96410551efa5a7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_7f142927bd0947d49acecfea26463ae6",
- "IPY_MODEL_50281819a51b4454a2f6ed4abda3b935",
- "IPY_MODEL_6439a70317e447029661c88b7c652d53"
- ],
- "layout": "IPY_MODEL_a89162677c0d4d95b3f5520bd036270c"
- }
- },
- "7f142927bd0947d49acecfea26463ae6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b5dd981b8ed84d2e9ca599a9537eb387",
- "placeholder": "",
- "style": "IPY_MODEL_f26936644c804771bf09975be21f4aa1",
- "value": "llava-v1.6-mistral-7b.Q3_K_XS.gguf: 100%"
- }
- },
- "50281819a51b4454a2f6ed4abda3b935": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9cc2e5b6551b47e08308355200a60234",
- "max": 2991290624,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_765b4ebd1d144f59b116a623b0428e14",
- "value": 2991290624
- }
- },
- "6439a70317e447029661c88b7c652d53": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_461b9e9a6a254fe384a5003adc0dbc29",
- "placeholder": "",
- "style": "IPY_MODEL_037bd58fa08d4baa87aaa2fd12df44c5",
- "value": " 2.99G/2.99G [00:26<00:00, 129MB/s]"
- }
- },
- "a89162677c0d4d95b3f5520bd036270c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b5dd981b8ed84d2e9ca599a9537eb387": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "f26936644c804771bf09975be21f4aa1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "9cc2e5b6551b47e08308355200a60234": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "765b4ebd1d144f59b116a623b0428e14": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "461b9e9a6a254fe384a5003adc0dbc29": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "037bd58fa08d4baa87aaa2fd12df44c5": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
\ No newline at end of file