chraac · chraac · Jun 22, 2025 · May 28, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
@@ -0,0 +1,41 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python unittests
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+permissions:
+  contents: read
+
+jobs:
+  python-unittest-scripts:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.11"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest
+        pip install -r ${GITHUB_WORKSPACE}/scripts/requirements.txt
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        export PYTHONPATH="$PYTHONPATH:${GITHUB_WORKSPACE}:${GITHUB_WORKSPACE}/scripts:${GITHUB_WORKSPACE}/scripts/tests"
+        cd scripts/tests
+        python3 -m test_log_parser -v
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ build_qnn_*
 temp/*
 *.txt
 run_logs
+*.pyc
diff --git a/docs/how-to-build.md b/docs/how-to-build.md
@@ -17,7 +17,7 @@ This guide describes the steps to build Android/Windows releases of the QNN back
 
 1. Navigate to the project root directory and run the build script:
    ```bash
-   ./docker/docker_compose_compile_and_share.sh
+   ./docker/docker_compose_compile.sh
    ```
 
 2. The console output will look similar to this, and executables will be located in `build_qnn_arm64-v8a/bin/`:
@@ -44,22 +44,22 @@ This guide describes the steps to build Android/Windows releases of the QNN back
 
 ```bash
 # Basic build (default: Release mode, QNN + Hexagon backends)
-./docker/docker_compose_compile_and_share.sh
+./docker/docker_compose_compile.sh
 
 # Debug build with Hexagon NPU backend
-./docker/docker_compose_compile_and_share.sh -d --enable-hexagon-backend
+./docker/docker_compose_compile.sh -d --enable-hexagon-backend
 
 # Debug build with Hexagon NPU backend only
-./docker/docker_compose_compile_and_share.sh -d --hexagon-npu-only
+./docker/docker_compose_compile.sh -d --hexagon-npu-only
 
 # Debug build with Hexagon NPU backend and quantized tensor support
-./docker/docker_compose_compile_and_share.sh -d --hexagon-npu-only --enable-dequant
+./docker/docker_compose_compile.sh -d --hexagon-npu-only --enable-dequant
 
 # QNN-only build with performance logging
-./docker/docker_compose_compile_and_share.sh --qnn-only --perf-log
+./docker/docker_compose_compile.sh --qnn-only --perf-log
 
 # Force rebuild with debug symbols and build timing
-./docker/docker_compose_compile_and_share.sh -r -d --print-build-time
+./docker/docker_compose_compile.sh -r -d --print-build-time
 ```
 
 ## Windows
@@ -80,6 +80,15 @@ This guide describes the steps to build Android/Windows releases of the QNN back
 
         ![VS2022 CMake Installation](https://github.com/user-attachments/assets/9a36dde5-0e41-4421-9161-e9b09cd32eb1)
 
+3. **Install Hexagon SDK (for Hexagon NPU backend)**
+   - To compile the `hexagon-npu` backend, you need to install the latest Hexagon SDK
+   - Follow the [official documentation](https://docs.qualcomm.com/bundle/publicresource/topics/80-77512-1/hexagon-dsp-sdk-getting-started.html?product=1601111740010422):
+     1. First install the Qualcomm Package Manager (QPM)
+     2. Then use QPM to install the Hexagon SDK
+   - Set the environment variable `HEXAGON_SDK_ROOT` to point to your installation directory
+
+   > **Note**: The Hexagon SDK is only required if you plan to build with `--enable-hexagon-backend` or `--hexagon-npu-only` flags.
+
 ### Build Steps
 
 1. **Open the Project**
@@ -124,4 +133,4 @@ This guide describes the steps to build Android/Windows releases of the QNN back
 After successful compilation, you'll find the following executables:
 - `llama-cli.exe` - Main inference executable
 - `llama-bench.exe` - Benchmarking tool
-- `test-backend-ops.exe` - Backend operation tests
+- `test-backend-ops.exe` - Backend operation tests 
diff --git a/llama.cpp b/llama.cpp
diff --git a/scripts/__init__.py b/scripts/__init__.py
diff --git a/scripts/batch_run_benchmarks_and_save_log.ps1 b/scripts/batch_run_benchmarks_and_save_log.ps1
@@ -9,7 +9,10 @@ param (
     [switch]$Verbose,
 
     [Alias('-s')]
-    [switch]$Skip8b
+    [switch]$Skip8b,
+
+    [Alias('-f')]
+    [switch]$FlashAttention
 )
 
 $_scriptPath = Split-Path -Parent $MyInvocation.MyCommand.Path
@@ -37,6 +40,10 @@ if ($Verbose) {
     $extraArgs = "-v"
 }
 
+if ($FlashAttention) {
+    $extraArgs += " --flash-attn 1"
+}
+
 $logFilePath = "$_scriptPath/../run_logs/$LogFileName"
 
 # Create logs directory if it doesn't exist

diff --git a/scripts/batch_run_benchmarks_and_save_log.sh b/scripts/batch_run_benchmarks_and_save_log.sh
@@ -8,6 +8,7 @@ _model_list=('meta-llama_Meta-Llama-3.2-1B-Instruct' 'meta-llama_Meta-Llama-3.2-
 _should_push_to_device=0
 _verbose_log=0
 _skip_8b_model=0
+_flash_attn=0
 
 # parse arguments to get the log file name
 while [[ $# -gt 0 ]]; do
@@ -30,6 +31,10 @@ while [[ $# -gt 0 ]]; do
         _skip_8b_model=1
         shift
         ;;
+    -f | --flash-attn)
+        _flash_attn=1
+        shift
+        ;;
     *)
         echo "Invalid option $1"
         exit 1
@@ -45,9 +50,13 @@ if [ $_skip_8b_model -eq 1 ]; then
     _model_list=('meta-llama_Meta-Llama-3.2-1B-Instruct' 'meta-llama_Meta-Llama-3.2-3B-Instruct')
 fi
 
-extra_args=""
+_extra_args=""
 if [ $_verbose_log -eq 1 ]; then
-    extra_args="-v"
+    _extra_args="-v"
+fi
+
+if [ $_flash_attn -eq 1 ]; then
+    _extra_args="${_extra_args} --flash-attn 1"
 fi
 
 log_file_path="$_script_path/../run_logs/$_log_file_name"
@@ -57,7 +66,7 @@ function run_benchmark() {
     local model_name=$1
     local command_string="cd $_device_path && "
     command_string+="LLAMA_CACHE=$_device_path/.cache LD_LIBRARY_PATH=./ ADSP_LIBRARY_PATH=./ "
-    command_string+="./llama-bench --progress ${extra_args} -mmp 0 -p 512 -n 128 -m ${_device_model_path}/$model_name"
+    command_string+="./llama-bench --progress ${_extra_args} -mmp 0 -p 512 -n 128 -m ${_device_model_path}/$model_name"
     adb shell $command_string
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,4 @@ build_qnn_* @@
     temp/*
     *.txt
     run_logs
+    *.pyc