{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"
"
]
},
{
"cell_type": "markdown",
"source": [
"# Download and Run LLM's on Google Colab (GPU-accelerated)"
],
"metadata": {
"id": "Iz-w0gCPmW8_"
}
},
{
"cell_type": "markdown",
"source": [
"\n",
"## Step 1: Get access token from huggingface"
],
"metadata": {
"id": "3Rq389lywEiu"
}
},
{
"cell_type": "markdown",
"source": [
"## Step 2: Install packages"
],
"metadata": {
"id": "AhEDYalWkv7b"
}
},
{
"cell_type": "code",
"source": [
"!python -V #Python 3.10.12"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-VuenKPqoNfS",
"outputId": "684fc88a-f112-4e08-9d26-b3e38ba8331e"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Python 3.10.12\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!nvcc --version # find the CUDA driver build above"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nj8FZCXshH-Y",
"outputId": "94e32e2a-dca5-475a-8428-deba8c297797"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"nvcc: NVIDIA (R) Cuda compiler driver\n",
"Copyright (c) 2005-2023 NVIDIA Corporation\n",
"Built on Tue_Aug_15_22:02:13_PDT_2023\n",
"Cuda compilation tools, release 12.2, V12.2.140\n",
"Build cuda_12.2.r12.2/compiler.33191640_0\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Install key libraries for LLM\n",
"\n",
"#Install llama-cpp-python with CUBLAS, compatible to CUDA 12.2 which is the CUDA driver build above\n",
"!set LLAMA_CUBLAS=1\n",
"!set CMAKE_ARGS=-DLLAMA_CUBLAS=on\n",
"!set FORCE_CMAKE=1\n",
"\n",
"#Install llama-cpp-python, cuda-enabled package\n",
"!python -m pip install llama-cpp-python==0.2.7 --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122\n",
"\n",
"#Install pytorch-related, cuda-enabled package\n",
"!pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-BfhqNzQk06m",
"outputId": "25910e79-e004-46ba-cfc8-3e0566bf8650"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122\n",
"Collecting llama-cpp-python==0.2.7\n",
" Downloading https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/wheels/llama_cpp_python-0.2.7%2Bcu122-cp310-cp310-manylinux_2_31_x86_64.whl (14.8 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.8/14.8 MB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.7) (4.12.1)\n",
"Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.7) (1.25.2)\n",
"Collecting diskcache>=5.6.1 (from llama-cpp-python==0.2.7)\n",
" Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: diskcache, llama-cpp-python\n",
"Successfully installed diskcache-5.6.3 llama-cpp-python-0.2.7+cu122\n",
"Looking in indexes: https://download.pytorch.org/whl/cu121\n",
"Requirement already satisfied: torch==2.3.0 in /usr/local/lib/python3.10/dist-packages (2.3.0+cu121)\n",
"Requirement already satisfied: torchvision==0.18.0 in /usr/local/lib/python3.10/dist-packages (0.18.0+cu121)\n",
"Requirement already satisfied: torchaudio==2.3.0 in /usr/local/lib/python3.10/dist-packages (2.3.0+cu121)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.14.0)\n",
"Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (4.12.1)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (1.12.1)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.3)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (3.1.4)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (2023.6.0)\n",
"Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m61.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-nccl-cu12==2.20.5 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.2/176.2 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.3.0) (2.3.0)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision==0.18.0) (1.25.2)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision==0.18.0) (9.4.0)\n",
"Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.3.0)\n",
" Downloading https://download.pytorch.org/whl/cu121/nvidia_nvjitlink_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (19.8 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.8/19.8 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.3.0) (2.1.5)\n",
"Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.3.0) (1.3.0)\n",
"Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n",
"Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.1.105 nvidia-nvtx-cu12-12.1.105\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#Working for GPU\n",
"%%writefile gpu_requirements.txt\n",
"annotated-types==0.7.0\n",
"anyio==4.4.0\n",
"certifi==2022.12.7\n",
"charset-normalizer==2.1.1\n",
"click==8.1.7\n",
"colorama==0.4.6\n",
"diskcache==5.6.3\n",
"dnspython==2.6.1\n",
"email_validator==2.1.1\n",
"exceptiongroup==1.2.1\n",
"filelock==3.13.1\n",
"fsspec==2024.6.0\n",
"h11==0.14.0\n",
"httpcore==1.0.5\n",
"httptools==0.6.1\n",
"httpx==0.27.0\n",
"huggingface-hub==0.23.3\n",
"idna==3.4\n",
"Jinja2==3.1.4\n",
"llama_cpp_python==0.2.7+cu122\n",
"markdown-it-py==3.0.0\n",
"MarkupSafe==2.1.5\n",
"mdurl==0.1.2\n",
"mpmath==1.3.0\n",
"networkx==3.2.1\n",
"numpy==1.26.4\n",
"orjson==3.10.3\n",
"packaging==24.0\n",
"pillow==10.2.0\n",
"pydantic==2.7.3\n",
"pydantic_core==2.18.4\n",
"Pygments==2.18.0\n",
"python-dotenv==1.0.1\n",
"python-multipart==0.0.9\n",
"PyYAML==6.0.1\n",
"requests==2.28.1\n",
"rich==13.7.1\n",
"shellingham==1.5.4\n",
"sniffio==1.3.1\n",
"starlette==0.37.2\n",
"sympy==1.12\n",
"torch==2.3.0+cu121\n",
"torchaudio==2.3.0+cu121\n",
"torchvision==0.18.0+cu121\n",
"tqdm==4.66.4\n",
"typer==0.12.3\n",
"typing_extensions==4.12.1\n",
"ujson==5.10.0\n",
"watchfiles==0.22.0"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Du8AaWpgpUeu",
"outputId": "824a8c74-0f7f-4806-c94f-19f8effb3c9d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Overwriting gpu_requirements.txt\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "hdVSk5iZ1DVB",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "94b5de4e-c6ad-4e97-cc38-bc8f8e7f1d79"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/86.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m155.3/155.3 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.9/176.9 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m401.7/401.7 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.5/61.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m59.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m51.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m56.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.8/143.8 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"ipython 7.34.0 requires jedi>=0.16, which is not installed.\n",
"gcsfs 2023.6.0 requires fsspec==2023.6.0, but you have fsspec 2024.6.0 which is incompatible.\n",
"google-colab 1.0.0 requires requests==2.31.0, but you have requests 2.28.1 which is incompatible.\n",
"imageio 2.31.6 requires pillow<10.1.0,>=8.3.2, but you have pillow 10.2.0 which is incompatible.\n",
"jupyter-server 1.24.0 requires anyio<4,>=3.1.0, but you have anyio 4.4.0 which is incompatible.\n",
"kaggle 1.6.14 requires certifi>=2023.7.22, but you have certifi 2022.12.7 which is incompatible.\n",
"spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
"weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
"yfinance 0.2.40 requires requests>=2.31, but you have requests 2.28.1 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0m"
]
}
],
"source": [
"!pip install -r gpu_requirements.txt #it's normal to see incompatiblity errors; the most important packages have been installed correctly"
]
},
{
"cell_type": "markdown",
"source": [
"## Step 3: Download almost any huggingface LLM\n",
"\n",
"Use the \"hf_hub_download\" function to download models on huggingface using the access token from Step 1. In this case, I want to download a small model of the \"llava-1.6-mistral-7b-gguf\" model from \"cjpais\"'s huggingface repository.\n"
],
"metadata": {
"id": "efJ_g2etSvLD"
}
},
{
"cell_type": "markdown",
"source": [
""
],
"metadata": {
"id": "Rpfqeh-wxzvV"
}
},
{
"cell_type": "code",
"source": [
"import torch\n",
"import huggingface_hub"
],
"metadata": {
"id": "kpXQGhHlij6q"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import os\n",
"from huggingface_hub import hf_hub_download\n",
"\n",
"# Function to read the token from a file\n",
"def read_token(file_path):\n",
" try:\n",
" with open(file_path, 'r') as file:\n",
" return file.readline().strip()\n",
" except FileNotFoundError:\n",
" raise ValueError(f\"Token file not found: {file_path}\")\n",
"\n",
"# Define the model name and file\n",
"model_name = \"cjpais/llava-1.6-mistral-7b-gguf\"\n",
"model_file = \"llava-v1.6-mistral-7b.Q3_K_XS.gguf\"\n",
"\n",
"\n",
"# Download the model from Hugging Face Hub\n",
"model_path = hf_hub_download(\n",
" model_name,\n",
" filename=model_file,\n",
" local_dir='models/', # Download the model to the \"models\" folder\n",
" token=\"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\" #Replace this token from huggingface with your own token (Setting -> Access Toekns -> New token -> Generate Token)\n",
")\n",
"\n",
"print(\"My model path:\", model_path)"
],
"metadata": {
"id": "WsYot3Rz1NVf",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 170,
"referenced_widgets": [
"9b2c72c88ad449208a96410551efa5a7",
"7f142927bd0947d49acecfea26463ae6",
"50281819a51b4454a2f6ed4abda3b935",
"6439a70317e447029661c88b7c652d53",
"a89162677c0d4d95b3f5520bd036270c",
"b5dd981b8ed84d2e9ca599a9537eb387",
"f26936644c804771bf09975be21f4aa1",
"9cc2e5b6551b47e08308355200a60234",
"765b4ebd1d144f59b116a623b0428e14",
"461b9e9a6a254fe384a5003adc0dbc29",
"037bd58fa08d4baa87aaa2fd12df44c5"
]
},
"outputId": "0e0b711e-6e60-42b7-dd30-18c63f138860"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
"To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
"You will be able to reuse this secret in all of your notebooks.\n",
"Please note that authentication is recommended but still optional to access public models or datasets.\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"llava-v1.6-mistral-7b.Q3_K_XS.gguf: 0%| | 0.00/2.99G [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9b2c72c88ad449208a96410551efa5a7"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"My model path: models/llava-v1.6-mistral-7b.Q3_K_XS.gguf\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from llama_cpp import Llama\n",
"\n",
"# model_path is location of to the GGUF model that you've download from HuggingFace on Colab\n",
"model_path = \"/content/models/llava-v1.6-mistral-7b.Q3_K_XS.gguf\"\n",
"\n",
"#load the LLM\n",
"llm = Llama(model_path=model_path,\n",
" n_gpu_layers=-1) #load model while enabling GPU"
],
"metadata": {
"id": "w2Y9Vn_nbFDM"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Note that BLAS = 1 means GPU is enabled:\n",
"* AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 |\n",
"\n"
],
"metadata": {
"id": "RFMXgvWzckbU"
}
},
{
"cell_type": "markdown",
"source": [
"## Step 4: Ask the LLM a question"
],
"metadata": {
"id": "U4GldNtm_s-P"
}
},
{
"cell_type": "code",
"source": [
"user_question = \"What is the earliest civilization in the world?\" # @param {type:\"string\"}"
],
"metadata": {
"id": "AD7MiXnRl1Kr"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import time\n",
"\n",
"# Prompt creation\n",
"system_message = \"You are a helpful assistant\"\n",
"user_message = \"Q: \"+ user_question+ \" A: \"\n",
"\n",
"# Start the timer\n",
"start_time = time.time()\n",
"\n",
"prompt = f\"\"\"[INST] <>\n",
"{system_message}\n",
"<>\n",
"{user_message} [/INST]\"\"\"\n",
"\n",
"# Run the model\n",
"output = llm(\n",
" prompt, # Prompt\n",
" max_tokens=2000, # Generate up to 2,000 tokens\n",
" stop=[\"Q:\", \"\\n\"], # Stop generating just before the model would generate a new question\n",
" #echo=True # Echo the prompt back in the output\n",
")\n",
"\n",
"# Stop the timer\n",
"end_time = time.time()\n",
"\n",
"# Get model response\n",
"print(output[\"choices\"][0][\"text\"])\n",
"\n",
"# Calculate runtime\n",
"runtime = end_time - start_time\n",
"print(\"response run time is: \", runtime)"
],
"metadata": {
"id": "kN73qG6-yg33"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"machine_shape": "hm",
"provenance": [],
"gpuClass": "premium",
"gpuType": "T4",
"toc_visible": true,
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"9b2c72c88ad449208a96410551efa5a7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_7f142927bd0947d49acecfea26463ae6",
"IPY_MODEL_50281819a51b4454a2f6ed4abda3b935",
"IPY_MODEL_6439a70317e447029661c88b7c652d53"
],
"layout": "IPY_MODEL_a89162677c0d4d95b3f5520bd036270c"
}
},
"7f142927bd0947d49acecfea26463ae6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b5dd981b8ed84d2e9ca599a9537eb387",
"placeholder": "",
"style": "IPY_MODEL_f26936644c804771bf09975be21f4aa1",
"value": "llava-v1.6-mistral-7b.Q3_K_XS.gguf: 100%"
}
},
"50281819a51b4454a2f6ed4abda3b935": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9cc2e5b6551b47e08308355200a60234",
"max": 2991290624,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_765b4ebd1d144f59b116a623b0428e14",
"value": 2991290624
}
},
"6439a70317e447029661c88b7c652d53": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_461b9e9a6a254fe384a5003adc0dbc29",
"placeholder": "",
"style": "IPY_MODEL_037bd58fa08d4baa87aaa2fd12df44c5",
"value": " 2.99G/2.99G [00:26<00:00, 129MB/s]"
}
},
"a89162677c0d4d95b3f5520bd036270c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b5dd981b8ed84d2e9ca599a9537eb387": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f26936644c804771bf09975be21f4aa1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9cc2e5b6551b47e08308355200a60234": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"765b4ebd1d144f59b116a623b0428e14": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"461b9e9a6a254fe384a5003adc0dbc29": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"037bd58fa08d4baa87aaa2fd12df44c5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}