Compare commits
133 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 48fd8579c5 | |||
| 62bf372e1d | |||
| 3a4654141f | |||
| 967dcfcb37 | |||
| d7faf186f8 | |||
| f819aa68d5 | |||
| 975ad20d5a | |||
| a94106ae61 | |||
| 777e830aec | |||
| bbf2dc903a | |||
| 8fb05858c8 | |||
| d0d0f30d64 | |||
| 04185f9a31 | |||
| 5227a1370c | |||
| c33f306944 | |||
| 3306780507 | |||
| 84d7b60cd2 | |||
| 0d435a5d4a | |||
| 4994543d56 | |||
| 026ceab2ef | |||
| c92bf504fb | |||
| a7eac42c9d | |||
| f20550d5f6 | |||
| 742b08c86d | |||
| a85ddcd6c1 | |||
| 758ac4ab62 | |||
| 56e5d7ff54 | |||
| 596a8ba21d | |||
| 19b67b3c8a | |||
| eaa97ce301 | |||
| a05d16a789 | |||
| f11aa77e3a | |||
| 1bbcf60e58 | |||
| c90ee58bc0 | |||
| 2983e20695 | |||
| 4e7570cb26 | |||
| 7987b61345 | |||
| 6fbd47ed8a | |||
| bada4ef524 | |||
| 8918d37bb6 | |||
| b0d94a3594 | |||
| 1291204d2b | |||
| 804f201589 | |||
| 65779b1eb6 | |||
| 7eac978950 | |||
| b1d0cfa2e7 | |||
| 282b1c2b0a | |||
| b4b0d4cd7e | |||
| 6a66ee75ac | |||
| 45dcdcccbb | |||
| 5d47f1dfca | |||
| 1e07e5f17b | |||
| 8adb1b9730 | |||
| 9d3720b6a1 | |||
| 6496601e4e | |||
| b96df3b501 | |||
| 489460f509 | |||
| 192870d658 | |||
| bed4b5ee49 | |||
| f0eb402100 | |||
| e73486904b | |||
| 1119a45e93 | |||
| 85860cdddd | |||
| da2e89bc94 | |||
| 01ed2da10e | |||
| 5fea839e34 | |||
| d725910e7a | |||
| 99f73ad745 | |||
| b599a990f6 | |||
| c742393efc | |||
| c403300cd2 | |||
| c3a5d6201e | |||
| 9e7350c3bb | |||
| 5bee471884 | |||
| 77eb8c7b78 | |||
| acf2302755 | |||
| 8fedd3defe | |||
| adda8181a6 | |||
| 789494cc89 | |||
| 561086e940 | |||
| 20c1140676 | |||
| 9bf99c0fdd | |||
| 899eb61dcf | |||
| a5e6e112a5 | |||
| 834795d6d9 | |||
| 8442cbca77 | |||
| bcca760403 | |||
| 102a58a068 | |||
| 4e849d5309 | |||
| 44e92c8bf0 | |||
| 873f5ae51e | |||
| 3e0b907138 | |||
| 7ee6b6a96b | |||
| 14adb673f6 | |||
| 91381f0850 | |||
| d3e91b5d06 | |||
| 74fcd7d569 | |||
| 0843cd8363 | |||
| 8c3d022506 | |||
| a5a55f3c7d | |||
| 2fb9e74a13 | |||
| f950882ffd | |||
| 18b3b572f0 | |||
| 023e3ff59b | |||
| 7318c81fe0 | |||
| f516a1cf4c | |||
| 43959fc758 | |||
| ff7b1e10a4 | |||
| b049712cd6 | |||
| 2cdfc60fe1 | |||
| 2739cb0894 | |||
| 3e5ef4fa08 | |||
| a7a010d660 | |||
| 67000f5ff1 | |||
| efa111e2dd | |||
| 813798ec2b | |||
| c8b1cd0fab | |||
| 9220dfb7a3 | |||
| f6e9497f1e | |||
| 45e7713638 | |||
| 802dc491f8 | |||
| 3ca5a36240 | |||
| 3046299414 | |||
| 46edd4e3e4 | |||
| 800a18ac60 | |||
| 37c60cb82a | |||
| 9446112081 | |||
| 07384e4d7c | |||
| 1f7bf74970 | |||
| 4b72490486 | |||
| 9e4e0bc24a | |||
| 18687666a6 | |||
| 09ff203662 |
@@ -67,9 +67,9 @@ def fetch_all_records(url: str, *, expand: str | None = None, per_page: int = 50
|
||||
return items
|
||||
|
||||
|
||||
def normalize_os_variants(install_methods_json: list[dict[str, Any]]) -> list[str]:
|
||||
def normalize_os_variants(install_methods: list[dict[str, Any]]) -> list[str]:
|
||||
os_values: list[str] = []
|
||||
for item in install_methods_json:
|
||||
for item in install_methods:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
resources = item.get("resources", {})
|
||||
@@ -83,6 +83,31 @@ def normalize_os_variants(install_methods_json: list[dict[str, Any]]) -> list[st
|
||||
return os_values
|
||||
|
||||
|
||||
def split_notes(notes_raw: list[dict[str, Any]]) -> tuple[list[str], list[str]]:
|
||||
"""Split PocketBase notes into (info_notes, warnings).
|
||||
|
||||
Each entry has shape ``{"text": str, "type": "warning"|...}``. Anything
|
||||
flagged ``type == "warning"`` lands in the warnings list so the bash
|
||||
menu can render those in red with a dedicated WARNINGS header. Other
|
||||
notes go to the regular notes list.
|
||||
"""
|
||||
info: list[str] = []
|
||||
warns: list[str] = []
|
||||
for note in notes_raw or []:
|
||||
if not isinstance(note, dict):
|
||||
continue
|
||||
text = note.get("text")
|
||||
if not isinstance(text, str) or not text.strip():
|
||||
continue
|
||||
text = text.strip()
|
||||
ntype = (note.get("type") or "").strip().lower()
|
||||
if ntype == "warning":
|
||||
warns.append(text)
|
||||
else:
|
||||
info.append(text)
|
||||
return info, warns
|
||||
|
||||
|
||||
def build_script_path(type_name: str, slug: str) -> str:
|
||||
type_name = (type_name or "").strip().lower()
|
||||
slug = (slug or "").strip()
|
||||
@@ -138,19 +163,19 @@ def main() -> int:
|
||||
full_script_url = f"{SCRIPT_BASE}/{script_path}"
|
||||
script_url_mirror = to_mirror_url(full_script_url)
|
||||
|
||||
install_methods_json = raw.get("install_methods_json", [])
|
||||
if not isinstance(install_methods_json, list):
|
||||
install_methods_json = []
|
||||
# Sprint 11.7: PocketBase exposes these as `install_methods` and
|
||||
# `notes`, not `install_methods_json` / `notes_json`. The legacy field
|
||||
# names silently returned [] for every entry, which is why the cache
|
||||
# had empty notes and missing OS variants for every script.
|
||||
install_methods = raw.get("install_methods", [])
|
||||
if not isinstance(install_methods, list):
|
||||
install_methods = []
|
||||
|
||||
notes_json = raw.get("notes_json", [])
|
||||
if not isinstance(notes_json, list):
|
||||
notes_json = []
|
||||
notes_raw = raw.get("notes", [])
|
||||
if not isinstance(notes_raw, list):
|
||||
notes_raw = []
|
||||
|
||||
notes = [
|
||||
note.get("text", "")
|
||||
for note in notes_json
|
||||
if isinstance(note, dict) and isinstance(note.get("text"), str) and note.get("text", "").strip()
|
||||
]
|
||||
notes, warnings = split_notes(notes_raw)
|
||||
|
||||
category_ids = raw.get("categories", [])
|
||||
if not isinstance(category_ids, list):
|
||||
@@ -193,6 +218,7 @@ def main() -> int:
|
||||
"categories": category_ids,
|
||||
"category_names": category_names,
|
||||
"notes": notes,
|
||||
"warnings": warnings,
|
||||
"port": raw.get("port", 0),
|
||||
"website": raw.get("website", ""),
|
||||
"documentation": raw.get("documentation", ""),
|
||||
@@ -210,7 +236,7 @@ def main() -> int:
|
||||
# Emit one entry per install method so the menu shell can offer an
|
||||
# explicit OS choice. When there is only one method (or none), a
|
||||
# single entry is emitted with os="" (script decides at runtime).
|
||||
os_variants = normalize_os_variants(install_methods_json)
|
||||
os_variants = normalize_os_variants(install_methods)
|
||||
|
||||
if len(os_variants) > 1:
|
||||
for os_name in os_variants:
|
||||
@@ -228,11 +254,12 @@ def main() -> int:
|
||||
with OUTPUT_FILE.open("w", encoding="utf-8") as f:
|
||||
json.dump(cache, f, ensure_ascii=False, indent=2)
|
||||
|
||||
total_notes = sum(len(e.get("notes", [])) for e in cache)
|
||||
total_warns = sum(len(e.get("warnings", [])) for e in cache)
|
||||
print(f"\n✅ helpers_cache.json → {OUTPUT_FILE}")
|
||||
print(f" Guardados: {len(cache)}")
|
||||
print(f" Guardados: {len(cache)} entries, {total_notes} notes, {total_warns} warnings")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
name: Build ProxMenux Monitor AppImage
|
||||
|
||||
on:
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '20'
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: AppImage
|
||||
run: npm install --legacy-peer-deps
|
||||
|
||||
- name: Build Next.js app
|
||||
working-directory: AppImage
|
||||
run: npm run build
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3 python3-pip python3-venv
|
||||
|
||||
- name: Make build script executable
|
||||
working-directory: AppImage
|
||||
run: chmod +x scripts/build_appimage.sh
|
||||
|
||||
- name: Build AppImage
|
||||
working-directory: AppImage
|
||||
run: ./scripts/build_appimage.sh
|
||||
|
||||
- name: Get version from package.json
|
||||
id: version
|
||||
working-directory: AppImage
|
||||
run: echo "VERSION=$(node -p "require('./package.json').version")" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Upload AppImage artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ProxMenux-${{ steps.version.outputs.VERSION }}-AppImage
|
||||
path: AppImage/dist/*.AppImage
|
||||
retention-days: 30
|
||||
|
||||
- name: Generate SHA256 checksum
|
||||
run: |
|
||||
cd AppImage/dist
|
||||
sha256sum *.AppImage > ProxMenux-Monitor.AppImage.sha256
|
||||
echo "Generated SHA256:"
|
||||
cat ProxMenux-Monitor.AppImage.sha256
|
||||
|
||||
- name: Upload AppImage and checksum to /AppImage folder in main
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
git config --global user.name "github-actions[bot]"
|
||||
git config --global user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
git fetch origin main
|
||||
git checkout main
|
||||
|
||||
rm -f AppImage/*.AppImage AppImage/*.sha256 || true
|
||||
|
||||
# Copy new files
|
||||
cp AppImage/dist/*.AppImage AppImage/
|
||||
cp AppImage/dist/ProxMenux-Monitor.AppImage.sha256 AppImage/
|
||||
|
||||
git add AppImage/*.AppImage AppImage/*.sha256
|
||||
git commit -m "Update AppImage build ($(date +'%Y-%m-%d %H:%M:%S'))" || echo "No changes to commit"
|
||||
git push origin main
|
||||
Binary file not shown.
@@ -1 +1 @@
|
||||
3b28537fe679b87f166bd5b01de05c71d1b4303f765c87f3a23b216d433120c2 ProxMenux-1.2.0.AppImage
|
||||
db5bc199adba9c231f344428ac902a0cbf7473778e8a79a4535263599d975449 ProxMenux-1.2.0.AppImage
|
||||
|
||||
@@ -2,12 +2,20 @@
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
interface SriovInfo {
|
||||
role: "vf" | "pf-active" | "pf-idle"
|
||||
physfn?: string // VF only: parent PF BDF
|
||||
vfCount?: number // PF only: active VF count
|
||||
totalvfs?: number // PF only: maximum VFs
|
||||
}
|
||||
|
||||
interface GpuSwitchModeIndicatorProps {
|
||||
mode: "lxc" | "vm" | "unknown"
|
||||
mode: "lxc" | "vm" | "sriov" | "unknown"
|
||||
isEditing?: boolean
|
||||
pendingMode?: "lxc" | "vm" | null
|
||||
onToggle?: (e: React.MouseEvent) => void
|
||||
className?: string
|
||||
sriovInfo?: SriovInfo
|
||||
}
|
||||
|
||||
export function GpuSwitchModeIndicator({
|
||||
@@ -16,20 +24,38 @@ export function GpuSwitchModeIndicator({
|
||||
pendingMode = null,
|
||||
onToggle,
|
||||
className,
|
||||
sriovInfo,
|
||||
}: GpuSwitchModeIndicatorProps) {
|
||||
const displayMode = pendingMode ?? mode
|
||||
// SR-IOV is a non-editable hardware state. Pending toggles don't apply here.
|
||||
const displayMode = mode === "sriov" ? "sriov" : (pendingMode ?? mode)
|
||||
const isLxcActive = displayMode === "lxc"
|
||||
const isVmActive = displayMode === "vm"
|
||||
const hasChanged = pendingMode !== null && pendingMode !== mode
|
||||
const isSriovActive = displayMode === "sriov"
|
||||
const hasChanged =
|
||||
mode !== "sriov" && pendingMode !== null && pendingMode !== mode
|
||||
|
||||
// Colors
|
||||
const activeColor = isLxcActive ? "#3b82f6" : isVmActive ? "#a855f7" : "#6b7280"
|
||||
const sriovColor = "#14b8a6" // teal-500
|
||||
const activeColor = isSriovActive
|
||||
? sriovColor
|
||||
: isLxcActive
|
||||
? "#3b82f6"
|
||||
: isVmActive
|
||||
? "#a855f7"
|
||||
: "#6b7280"
|
||||
const inactiveColor = "#374151" // gray-700 for dark theme
|
||||
const dimmedColor = "#4b5563" // gray-600 for dashed SR-IOV branches
|
||||
const lxcColor = isLxcActive ? "#3b82f6" : inactiveColor
|
||||
const vmColor = isVmActive ? "#a855f7" : inactiveColor
|
||||
|
||||
const handleClick = (e: React.MouseEvent) => {
|
||||
// Only stop propagation and handle toggle when in editing mode
|
||||
// SR-IOV state can't be toggled — swallow the click so it doesn't reach
|
||||
// the card (which would open the detail modal unexpectedly from this
|
||||
// area). For lxc/vm, preserve the original behavior.
|
||||
if (isSriovActive) {
|
||||
e.stopPropagation()
|
||||
return
|
||||
}
|
||||
if (isEditing) {
|
||||
e.stopPropagation()
|
||||
if (onToggle) {
|
||||
@@ -39,11 +65,20 @@ export function GpuSwitchModeIndicator({
|
||||
// When not editing, let the click propagate to the card to open the modal
|
||||
}
|
||||
|
||||
// Build the VF count label shown in the SR-IOV badge. For PFs we know
|
||||
// exactly how many VFs are active; for a VF we show its parent PF.
|
||||
const sriovBadgeText = (() => {
|
||||
if (!isSriovActive) return ""
|
||||
if (sriovInfo?.role === "vf") return "SR-IOV VF"
|
||||
if (sriovInfo?.vfCount && sriovInfo.vfCount > 0) return `SR-IOV ×${sriovInfo.vfCount}`
|
||||
return "SR-IOV"
|
||||
})()
|
||||
|
||||
return (
|
||||
<div
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center gap-6",
|
||||
isEditing && "cursor-pointer",
|
||||
isEditing && !isSriovActive && "cursor-pointer",
|
||||
className
|
||||
)}
|
||||
onClick={handleClick}
|
||||
@@ -77,10 +112,10 @@ export function GpuSwitchModeIndicator({
|
||||
<line x1="26" y1="44" x2="26" y2="50" stroke={activeColor} strokeWidth="2.5" strokeLinecap="round" className="transition-all duration-300" />
|
||||
<line x1="38" y1="44" x2="38" y2="50" stroke={activeColor} strokeWidth="2.5" strokeLinecap="round" className="transition-all duration-300" />
|
||||
{/* GPU text */}
|
||||
<text
|
||||
x="26"
|
||||
y="32"
|
||||
textAnchor="middle"
|
||||
<text
|
||||
x="26"
|
||||
y="32"
|
||||
textAnchor="middle"
|
||||
fill={activeColor}
|
||||
className="text-[14px] font-bold transition-all duration-300"
|
||||
style={{ fontFamily: 'system-ui, sans-serif' }}
|
||||
@@ -106,8 +141,8 @@ export function GpuSwitchModeIndicator({
|
||||
cx="95"
|
||||
cy="50"
|
||||
r="14"
|
||||
fill={isEditing ? "#f59e0b20" : `${activeColor}20`}
|
||||
stroke={isEditing ? "#f59e0b" : activeColor}
|
||||
fill={isEditing && !isSriovActive ? "#f59e0b20" : `${activeColor}20`}
|
||||
stroke={isEditing && !isSriovActive ? "#f59e0b" : activeColor}
|
||||
strokeWidth="3"
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
@@ -115,112 +150,198 @@ export function GpuSwitchModeIndicator({
|
||||
cx="95"
|
||||
cy="50"
|
||||
r="6"
|
||||
fill={isEditing ? "#f59e0b" : activeColor}
|
||||
fill={isEditing && !isSriovActive ? "#f59e0b" : activeColor}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
|
||||
{/* LXC Branch Line - going up-right */}
|
||||
{/* LXC Branch Line - going up-right.
|
||||
In SR-IOV mode the branch is dashed + dimmed to show that the
|
||||
target is theoretically reachable via a VF but not controlled
|
||||
by ProxMenux. */}
|
||||
<path
|
||||
d="M 109 42 L 135 20"
|
||||
fill="none"
|
||||
stroke={lxcColor}
|
||||
stroke={isSriovActive ? dimmedColor : lxcColor}
|
||||
strokeWidth={isLxcActive ? "3.5" : "2"}
|
||||
strokeLinecap="round"
|
||||
strokeDasharray={isSriovActive ? "3 3" : undefined}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
|
||||
{/* VM Branch Line - going down-right */}
|
||||
{/* VM Branch Line - going down-right (dashed/dimmed in SR-IOV). */}
|
||||
<path
|
||||
d="M 109 58 L 135 80"
|
||||
fill="none"
|
||||
stroke={vmColor}
|
||||
stroke={isSriovActive ? dimmedColor : vmColor}
|
||||
strokeWidth={isVmActive ? "3.5" : "2"}
|
||||
strokeLinecap="round"
|
||||
strokeDasharray={isSriovActive ? "3 3" : undefined}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
|
||||
{/* LXC Container Icon - Server/Stack icon */}
|
||||
<g transform="translate(138, 2)">
|
||||
{/* Container box */}
|
||||
<rect
|
||||
x="0"
|
||||
y="0"
|
||||
width="32"
|
||||
height="28"
|
||||
rx="4"
|
||||
fill={isLxcActive ? `${lxcColor}25` : "transparent"}
|
||||
stroke={lxcColor}
|
||||
strokeWidth={isLxcActive ? "2.5" : "1.5"}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
{/* Container layers/lines */}
|
||||
<line x1="0" y1="10" x2="32" y2="10" stroke={lxcColor} strokeWidth={isLxcActive ? "1.5" : "1"} className="transition-all duration-300" />
|
||||
<line x1="0" y1="19" x2="32" y2="19" stroke={lxcColor} strokeWidth={isLxcActive ? "1.5" : "1"} className="transition-all duration-300" />
|
||||
{/* Status dots */}
|
||||
<circle cx="7" cy="5" r="2" fill={lxcColor} className="transition-all duration-300" />
|
||||
<circle cx="7" cy="14.5" r="2" fill={lxcColor} className="transition-all duration-300" />
|
||||
<circle cx="7" cy="23.5" r="2" fill={lxcColor} className="transition-all duration-300" />
|
||||
</g>
|
||||
{/* SR-IOV in-line connector + badge (only when mode === 'sriov').
|
||||
A horizontal line from the switch node leads to a pill-shaped
|
||||
badge carrying the "SR-IOV ×N" label. Placed on the GPU's
|
||||
baseline to visually read as an in-line extension, not as a
|
||||
third branch. */}
|
||||
{isSriovActive && (
|
||||
<>
|
||||
<line
|
||||
x1="109"
|
||||
y1="50"
|
||||
x2="130"
|
||||
y2="50"
|
||||
stroke={sriovColor}
|
||||
strokeWidth="3"
|
||||
strokeLinecap="round"
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
<rect
|
||||
x="132"
|
||||
y="40"
|
||||
width="60"
|
||||
height="20"
|
||||
rx="10"
|
||||
fill={`${sriovColor}25`}
|
||||
stroke={sriovColor}
|
||||
strokeWidth="2"
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
<text
|
||||
x="162"
|
||||
y="54"
|
||||
textAnchor="middle"
|
||||
fill={sriovColor}
|
||||
className="text-[11px] font-bold transition-all duration-300"
|
||||
style={{ fontFamily: 'system-ui, sans-serif' }}
|
||||
>
|
||||
{sriovBadgeText}
|
||||
</text>
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* LXC Container Icon - dimmed/smaller in SR-IOV mode. */}
|
||||
{!isSriovActive && (
|
||||
<g transform="translate(138, 2)">
|
||||
<rect
|
||||
x="0"
|
||||
y="0"
|
||||
width="32"
|
||||
height="28"
|
||||
rx="4"
|
||||
fill={isLxcActive ? `${lxcColor}25` : "transparent"}
|
||||
stroke={lxcColor}
|
||||
strokeWidth={isLxcActive ? "2.5" : "1.5"}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
<line x1="0" y1="10" x2="32" y2="10" stroke={lxcColor} strokeWidth={isLxcActive ? "1.5" : "1"} className="transition-all duration-300" />
|
||||
<line x1="0" y1="19" x2="32" y2="19" stroke={lxcColor} strokeWidth={isLxcActive ? "1.5" : "1"} className="transition-all duration-300" />
|
||||
<circle cx="7" cy="5" r="2" fill={lxcColor} className="transition-all duration-300" />
|
||||
<circle cx="7" cy="14.5" r="2" fill={lxcColor} className="transition-all duration-300" />
|
||||
<circle cx="7" cy="23.5" r="2" fill={lxcColor} className="transition-all duration-300" />
|
||||
</g>
|
||||
)}
|
||||
{/* SR-IOV: compact dimmed LXC glyph so the geometry stays recognizable
|
||||
but it's clearly not the active target. */}
|
||||
{isSriovActive && (
|
||||
<g transform="translate(138, 6)" opacity="0.35">
|
||||
<rect x="0" y="0" width="20" height="18" rx="3" fill="transparent" stroke={dimmedColor} strokeWidth="1.5" />
|
||||
<line x1="0" y1="6" x2="20" y2="6" stroke={dimmedColor} strokeWidth="1" />
|
||||
<line x1="0" y1="12" x2="20" y2="12" stroke={dimmedColor} strokeWidth="1" />
|
||||
</g>
|
||||
)}
|
||||
|
||||
{/* LXC Label */}
|
||||
<text
|
||||
x="188"
|
||||
y="22"
|
||||
textAnchor="start"
|
||||
fill={lxcColor}
|
||||
className={cn(
|
||||
"transition-all duration-300",
|
||||
isLxcActive ? "text-[14px] font-bold" : "text-[12px] font-medium"
|
||||
)}
|
||||
style={{ fontFamily: 'system-ui, sans-serif' }}
|
||||
>
|
||||
LXC
|
||||
</text>
|
||||
{!isSriovActive && (
|
||||
<text
|
||||
x="188"
|
||||
y="22"
|
||||
textAnchor="start"
|
||||
fill={lxcColor}
|
||||
className={cn(
|
||||
"transition-all duration-300",
|
||||
isLxcActive ? "text-[14px] font-bold" : "text-[12px] font-medium"
|
||||
)}
|
||||
style={{ fontFamily: 'system-ui, sans-serif' }}
|
||||
>
|
||||
LXC
|
||||
</text>
|
||||
)}
|
||||
{isSriovActive && (
|
||||
<text
|
||||
x="162"
|
||||
y="16"
|
||||
fill={dimmedColor}
|
||||
className="text-[9px] font-medium"
|
||||
style={{ fontFamily: 'system-ui, sans-serif' }}
|
||||
>
|
||||
LXC
|
||||
</text>
|
||||
)}
|
||||
|
||||
{/* VM Monitor Icon */}
|
||||
<g transform="translate(138, 65)">
|
||||
{/* Monitor screen */}
|
||||
<rect
|
||||
x="2"
|
||||
y="0"
|
||||
width="28"
|
||||
height="18"
|
||||
rx="3"
|
||||
fill={isVmActive ? `${vmColor}25` : "transparent"}
|
||||
stroke={vmColor}
|
||||
strokeWidth={isVmActive ? "2.5" : "1.5"}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
{/* Screen inner/shine */}
|
||||
<rect
|
||||
x="5"
|
||||
y="3"
|
||||
width="22"
|
||||
height="12"
|
||||
rx="1"
|
||||
fill={isVmActive ? `${vmColor}30` : `${vmColor}10`}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
{/* Monitor stand */}
|
||||
<line x1="16" y1="18" x2="16" y2="24" stroke={vmColor} strokeWidth={isVmActive ? "2.5" : "1.5"} strokeLinecap="round" className="transition-all duration-300" />
|
||||
{/* Monitor base */}
|
||||
<line x1="8" y1="24" x2="24" y2="24" stroke={vmColor} strokeWidth={isVmActive ? "2.5" : "1.5"} strokeLinecap="round" className="transition-all duration-300" />
|
||||
</g>
|
||||
{/* VM Monitor Icon - active view */}
|
||||
{!isSriovActive && (
|
||||
<g transform="translate(138, 65)">
|
||||
<rect
|
||||
x="2"
|
||||
y="0"
|
||||
width="28"
|
||||
height="18"
|
||||
rx="3"
|
||||
fill={isVmActive ? `${vmColor}25` : "transparent"}
|
||||
stroke={vmColor}
|
||||
strokeWidth={isVmActive ? "2.5" : "1.5"}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
<rect
|
||||
x="5"
|
||||
y="3"
|
||||
width="22"
|
||||
height="12"
|
||||
rx="1"
|
||||
fill={isVmActive ? `${vmColor}30` : `${vmColor}10`}
|
||||
className="transition-all duration-300"
|
||||
/>
|
||||
<line x1="16" y1="18" x2="16" y2="24" stroke={vmColor} strokeWidth={isVmActive ? "2.5" : "1.5"} strokeLinecap="round" className="transition-all duration-300" />
|
||||
<line x1="8" y1="24" x2="24" y2="24" stroke={vmColor} strokeWidth={isVmActive ? "2.5" : "1.5"} strokeLinecap="round" className="transition-all duration-300" />
|
||||
</g>
|
||||
)}
|
||||
{/* SR-IOV: compact dimmed VM monitor glyph, mirror of the LXC glyph. */}
|
||||
{isSriovActive && (
|
||||
<g transform="translate(138, 72)" opacity="0.35">
|
||||
<rect x="0" y="0" width="20" height="13" rx="2" fill="transparent" stroke={dimmedColor} strokeWidth="1.5" />
|
||||
<line x1="10" y1="13" x2="10" y2="17" stroke={dimmedColor} strokeWidth="1.5" strokeLinecap="round" />
|
||||
<line x1="5" y1="17" x2="15" y2="17" stroke={dimmedColor} strokeWidth="1.5" strokeLinecap="round" />
|
||||
</g>
|
||||
)}
|
||||
|
||||
{/* VM Label */}
|
||||
<text
|
||||
x="188"
|
||||
y="84"
|
||||
textAnchor="start"
|
||||
fill={vmColor}
|
||||
className={cn(
|
||||
"transition-all duration-300",
|
||||
isVmActive ? "text-[14px] font-bold" : "text-[12px] font-medium"
|
||||
)}
|
||||
style={{ fontFamily: 'system-ui, sans-serif' }}
|
||||
>
|
||||
VM
|
||||
</text>
|
||||
{!isSriovActive && (
|
||||
<text
|
||||
x="188"
|
||||
y="84"
|
||||
textAnchor="start"
|
||||
fill={vmColor}
|
||||
className={cn(
|
||||
"transition-all duration-300",
|
||||
isVmActive ? "text-[14px] font-bold" : "text-[12px] font-medium"
|
||||
)}
|
||||
style={{ fontFamily: 'system-ui, sans-serif' }}
|
||||
>
|
||||
VM
|
||||
</text>
|
||||
)}
|
||||
{isSriovActive && (
|
||||
<text
|
||||
x="162"
|
||||
y="82"
|
||||
fill={dimmedColor}
|
||||
className="text-[9px] font-medium"
|
||||
style={{ fontFamily: 'system-ui, sans-serif' }}
|
||||
>
|
||||
VM
|
||||
</text>
|
||||
)}
|
||||
</svg>
|
||||
|
||||
{/* Status Text - Large like GPU name */}
|
||||
@@ -228,22 +349,41 @@ export function GpuSwitchModeIndicator({
|
||||
<span
|
||||
className={cn(
|
||||
"text-base font-semibold transition-all duration-300",
|
||||
isLxcActive ? "text-blue-500" : isVmActive ? "text-purple-500" : "text-muted-foreground"
|
||||
isSriovActive
|
||||
? "text-teal-500"
|
||||
: isLxcActive
|
||||
? "text-blue-500"
|
||||
: isVmActive
|
||||
? "text-purple-500"
|
||||
: "text-muted-foreground"
|
||||
)}
|
||||
>
|
||||
{isLxcActive
|
||||
? "Ready for LXC containers"
|
||||
: isVmActive
|
||||
? "Ready for VM passthrough"
|
||||
: "Mode unknown"}
|
||||
{isSriovActive
|
||||
? "SR-IOV active"
|
||||
: isLxcActive
|
||||
? "Ready for LXC containers"
|
||||
: isVmActive
|
||||
? "Ready for VM passthrough"
|
||||
: "Mode unknown"}
|
||||
</span>
|
||||
<span className="text-sm text-muted-foreground">
|
||||
{isLxcActive
|
||||
? "Native driver active"
|
||||
: isVmActive
|
||||
? "VFIO-PCI driver active"
|
||||
: "No driver detected"}
|
||||
{isSriovActive
|
||||
? "Virtual Functions managed externally"
|
||||
: isLxcActive
|
||||
? "Native driver active"
|
||||
: isVmActive
|
||||
? "VFIO-PCI driver active"
|
||||
: "No driver detected"}
|
||||
</span>
|
||||
{isSriovActive && sriovInfo && (
|
||||
<span className="text-xs font-mono text-teal-600/80 dark:text-teal-400/80">
|
||||
{sriovInfo.role === "vf"
|
||||
? `Virtual Function${sriovInfo.physfn ? ` · parent PF ${sriovInfo.physfn}` : ""}`
|
||||
: sriovInfo.vfCount !== undefined
|
||||
? `1 PF + ${sriovInfo.vfCount} VF${sriovInfo.vfCount === 1 ? "" : "s"}${sriovInfo.totalvfs ? ` / ${sriovInfo.totalvfs} max` : ""}`
|
||||
: null}
|
||||
</span>
|
||||
)}
|
||||
{hasChanged && (
|
||||
<span className="text-sm text-amber-500 font-medium animate-pulse">
|
||||
Change pending...
|
||||
|
||||
@@ -293,11 +293,16 @@ export default function Hardware() {
|
||||
const [showSwitchModeModal, setShowSwitchModeModal] = useState(false)
|
||||
const [switchModeParams, setSwitchModeParams] = useState<{ gpuSlot: string; targetMode: "lxc" | "vm" } | null>(null)
|
||||
|
||||
// Determine GPU mode based on driver (vfio-pci = VM, native driver = LXC)
|
||||
const getGpuSwitchMode = (gpu: GPU): "lxc" | "vm" | "unknown" => {
|
||||
// Determine GPU mode based on driver (vfio-pci = VM, native driver = LXC).
|
||||
// SR-IOV short-circuits the driver check: if the GPU is either a VF or a
|
||||
// PF with active VFs, the slot is in a hardware-partitioned state that
|
||||
// ProxMenux does not manage from the UI, so it's surfaced as its own mode.
|
||||
const getGpuSwitchMode = (gpu: GPU): "lxc" | "vm" | "sriov" | "unknown" => {
|
||||
if (gpu.sriov_role === "vf" || gpu.sriov_role === "pf-active") return "sriov"
|
||||
|
||||
const driver = gpu.pci_driver?.toLowerCase() || ""
|
||||
const kernelModule = gpu.pci_kernel_module?.toLowerCase() || ""
|
||||
|
||||
|
||||
// Check driver first
|
||||
if (driver === "vfio-pci") return "vm"
|
||||
if (driver === "nvidia" || driver === "amdgpu" || driver === "radeon" || driver === "i915" || driver === "xe" || driver === "nouveau" || driver === "mgag200") return "lxc"
|
||||
@@ -940,7 +945,11 @@ return (
|
||||
Switch Mode
|
||||
</span>
|
||||
<div className="flex items-center gap-2">
|
||||
{editingSwitchModeGpu === fullSlot ? (
|
||||
{getGpuSwitchMode(gpu) === "sriov" ? (
|
||||
// SR-IOV: edit controls hidden — the state is
|
||||
// hardware-managed and not togglable from here.
|
||||
null
|
||||
) : editingSwitchModeGpu === fullSlot ? (
|
||||
<>
|
||||
<button
|
||||
className="h-7 px-3 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors text-muted-foreground"
|
||||
@@ -981,6 +990,16 @@ return (
|
||||
isEditing={editingSwitchModeGpu === fullSlot}
|
||||
pendingMode={pendingSwitchModes[gpu.slot] || null}
|
||||
onToggle={(e) => handleSwitchModeToggle(gpu, e)}
|
||||
sriovInfo={
|
||||
gpu.sriov_role === "vf" || gpu.sriov_role === "pf-active"
|
||||
? {
|
||||
role: gpu.sriov_role,
|
||||
physfn: gpu.sriov_physfn,
|
||||
vfCount: gpu.sriov_vf_count,
|
||||
totalvfs: gpu.sriov_totalvfs,
|
||||
}
|
||||
: undefined
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
@@ -1053,8 +1072,104 @@ return (
|
||||
<Loader2 className="h-8 w-8 animate-spin mx-auto mb-2 text-primary" />
|
||||
<p className="text-sm">Loading real-time data...</p>
|
||||
</div>
|
||||
) : selectedGPU.sriov_role === "vf" ? (
|
||||
// SR-IOV Virtual Function: per-VF telemetry is not exposed
|
||||
// by the kernel, so we skip the metrics panel and show
|
||||
// identity + consumer + a link back to the parent PF.
|
||||
<div className="space-y-4">
|
||||
<div className="rounded-lg bg-teal-500/10 p-4 border border-teal-500/20">
|
||||
<div className="flex gap-3">
|
||||
<div className="flex-shrink-0">
|
||||
<svg className="h-5 w-5 text-teal-500" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M13 10V3L4 14h7v7l9-11h-7z" />
|
||||
</svg>
|
||||
</div>
|
||||
<div className="flex-1">
|
||||
<h4 className="text-sm font-semibold text-teal-500 mb-1">SR-IOV Virtual Function</h4>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
This device is a Virtual Function spawned by a Physical Function. Per-VF
|
||||
telemetry (temperature, utilization, memory) is not exposed by the kernel —
|
||||
open the parent PF to see aggregate GPU metrics.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="rounded-lg border border-border/50 p-4 space-y-3">
|
||||
<h3 className="text-sm font-semibold text-muted-foreground mb-1 uppercase tracking-wide">
|
||||
Virtual Function Detail
|
||||
</h3>
|
||||
<div className="flex justify-between items-center">
|
||||
<span className="text-sm text-muted-foreground">Parent Physical Function</span>
|
||||
{selectedGPU.sriov_physfn ? (
|
||||
<button
|
||||
className="font-mono text-sm text-teal-500 hover:underline"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
const pf = hardwareData?.gpus?.find(
|
||||
(g) => g.slot === selectedGPU.sriov_physfn
|
||||
)
|
||||
if (pf) setSelectedGPU(pf)
|
||||
}}
|
||||
>
|
||||
{selectedGPU.sriov_physfn}
|
||||
</button>
|
||||
) : (
|
||||
<span className="font-mono text-sm text-muted-foreground">unknown</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex justify-between items-center">
|
||||
<span className="text-sm text-muted-foreground">Current Driver</span>
|
||||
<span className="font-mono text-sm">
|
||||
{selectedGPU.pci_driver || "none"}
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex justify-between items-start">
|
||||
<span className="text-sm text-muted-foreground">Consumer</span>
|
||||
<div className="text-sm text-right">
|
||||
{realtimeGPUData?.sriov_consumer ? (
|
||||
<span className={cn(
|
||||
"inline-flex items-center gap-1.5 px-2 py-0.5 rounded-md text-xs font-medium",
|
||||
realtimeGPUData.sriov_consumer.running
|
||||
? "bg-teal-500/10 text-teal-500"
|
||||
: "bg-muted text-muted-foreground"
|
||||
)}>
|
||||
<span className="h-1.5 w-1.5 rounded-full bg-current" />
|
||||
{realtimeGPUData.sriov_consumer.type.toUpperCase()} {realtimeGPUData.sriov_consumer.id}
|
||||
{realtimeGPUData.sriov_consumer.name && ` · ${realtimeGPUData.sriov_consumer.name}`}
|
||||
{` · ${realtimeGPUData.sriov_consumer.running ? "running" : "stopped"}`}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-muted-foreground italic">unused</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
) : realtimeGPUData?.has_monitoring_tool === true ? (
|
||||
<>
|
||||
{selectedGPU.sriov_role === "pf-active" && (
|
||||
// SR-IOV Physical Function: metrics below are the
|
||||
// aggregate of the whole GPU (PF + all active VFs).
|
||||
// Flag it explicitly so the reader interprets numbers
|
||||
// correctly.
|
||||
<div className="rounded-lg bg-teal-500/10 p-3 border border-teal-500/20">
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<span className="inline-flex items-center gap-1.5 px-2 py-0.5 rounded-md bg-teal-500/15 text-teal-500 text-xs font-semibold">
|
||||
<span className="h-1.5 w-1.5 rounded-full bg-teal-500" />
|
||||
SR-IOV active
|
||||
</span>
|
||||
<span className="text-sm text-muted-foreground">
|
||||
Metrics below reflect the Physical Function (aggregate across
|
||||
{" "}
|
||||
<span className="font-semibold text-foreground">
|
||||
{realtimeGPUData?.sriov_vf_count ?? selectedGPU.sriov_vf_count ?? "N"}
|
||||
</span>
|
||||
{" "}VFs).
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<div className="flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<div className="h-2 w-2 rounded-full bg-green-500 animate-pulse" />
|
||||
<span>Updating every 3 seconds</span>
|
||||
@@ -1285,6 +1400,67 @@ return (
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{selectedGPU.sriov_role === "pf-active" &&
|
||||
Array.isArray(realtimeGPUData?.sriov_vfs) &&
|
||||
realtimeGPUData.sriov_vfs.length > 0 && (
|
||||
// Per-VF table: one row per virtfn* under the PF.
|
||||
// Driver is color-coded (teal native / purple vfio-pci
|
||||
// / muted fallback) and consumer pills go green when
|
||||
// the guest is currently running, muted otherwise.
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold text-muted-foreground mb-3 uppercase tracking-wide">
|
||||
Virtual Functions
|
||||
</h3>
|
||||
<div className="rounded-lg border border-border/50 divide-y divide-border/30 overflow-hidden">
|
||||
{realtimeGPUData.sriov_vfs.map((vf: any) => (
|
||||
<div
|
||||
key={vf.bdf}
|
||||
className="flex items-center justify-between gap-3 px-4 py-2.5 hover:bg-muted/30 transition-colors"
|
||||
>
|
||||
<span className="font-mono text-xs text-foreground">{vf.bdf}</span>
|
||||
<div className="flex items-center gap-3 flex-wrap justify-end">
|
||||
<span
|
||||
className={cn(
|
||||
"font-mono text-[11px] px-2 py-0.5 rounded",
|
||||
vf.driver === "vfio-pci"
|
||||
? "bg-purple-500/10 text-purple-500"
|
||||
: vf.driver === "i915" ||
|
||||
vf.driver === "xe" ||
|
||||
vf.driver === "amdgpu" ||
|
||||
vf.driver === "radeon" ||
|
||||
vf.driver === "nvidia"
|
||||
? "bg-teal-500/10 text-teal-500"
|
||||
: "bg-muted text-muted-foreground"
|
||||
)}
|
||||
>
|
||||
{vf.driver || "unbound"}
|
||||
</span>
|
||||
{vf.consumer ? (
|
||||
<span
|
||||
className={cn(
|
||||
"inline-flex items-center gap-1.5 px-2 py-0.5 rounded-md text-xs font-medium",
|
||||
vf.consumer.running
|
||||
? "bg-green-500/10 text-green-500"
|
||||
: "bg-muted text-muted-foreground"
|
||||
)}
|
||||
>
|
||||
<span className="h-1.5 w-1.5 rounded-full bg-current" />
|
||||
{vf.consumer.type.toUpperCase()} {vf.consumer.id}
|
||||
{vf.consumer.name && (
|
||||
<span className="opacity-70">· {vf.consumer.name}</span>
|
||||
)}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-xs text-muted-foreground italic">
|
||||
unused
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
) : (findPCIDeviceForGPU(selectedGPU)?.driver === 'vfio-pci' || selectedGPU.pci_driver === 'vfio-pci') ? (
|
||||
<div className="rounded-lg bg-purple-500/10 p-4 border border-purple-500/20">
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
{
|
||||
"_description": "Verified AI models for ProxMenux notifications. Only models listed here will be shown to users. Models are tested to work with the chat/completions API format.",
|
||||
"_updated": "2026-03-20",
|
||||
|
||||
"_updated": "2026-04-19",
|
||||
"_verifier": "Refreshed with tools/ai-models-verifier (private). Re-run before each ProxMenux release to keep the list current. The verifier and ProxMenux share the same reasoning/thinking-model handlers so their verdicts stay aligned with runtime behaviour.",
|
||||
|
||||
"groq": {
|
||||
"models": [
|
||||
"llama-3.3-70b-versatile",
|
||||
@@ -12,37 +13,46 @@
|
||||
"mixtral-8x7b-32768",
|
||||
"gemma2-9b-it"
|
||||
],
|
||||
"recommended": "llama-3.3-70b-versatile"
|
||||
"recommended": "llama-3.3-70b-versatile",
|
||||
"_note": "Not yet re-verified in 2026-04 refresh — kept from previous curation. Run the verifier with a Groq key to prune deprecated entries."
|
||||
},
|
||||
|
||||
|
||||
"gemini": {
|
||||
"models": [
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.5-flash-lite",
|
||||
"gemini-2.5-pro"
|
||||
"gemini-2.5-flash",
|
||||
"gemini-3-flash-preview"
|
||||
],
|
||||
"recommended": "gemini-2.5-flash",
|
||||
"_note": "gemini-2.5-flash-lite is cheaper but may struggle with complex prompts. Use with simple/custom prompts.",
|
||||
"recommended": "gemini-2.5-flash-lite",
|
||||
"_note": "flash-lite / flash pass the verifier consistently; pro variants reject thinkingBudget=0 and are overkill for notification translation anyway. 'latest' aliases (gemini-flash-latest, gemini-flash-lite-latest) are intentionally omitted because they resolved to different models across runs and produced timeouts in some regions.",
|
||||
"_deprecated": ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-1.5-flash", "gemini-1.0-pro", "gemini-pro"]
|
||||
},
|
||||
|
||||
|
||||
"openai": {
|
||||
"models": [
|
||||
"gpt-4.1-nano",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4o-mini"
|
||||
"gpt-4o-mini",
|
||||
"gpt-4.1",
|
||||
"gpt-4o",
|
||||
"gpt-5-chat-latest",
|
||||
"gpt-5.4-nano",
|
||||
"gpt-5.4-mini"
|
||||
],
|
||||
"recommended": "gpt-4o-mini"
|
||||
"recommended": "gpt-4.1-nano",
|
||||
"_note": "Reasoning models (o-series, gpt-5/5.1/5.2 non-chat variants) are supported by openai_provider.py via max_completion_tokens + reasoning_effort=minimal, but not listed here by default: their latency is higher than the chat models and they do not improve translation quality for notifications. Add specific reasoning IDs to this list only if a user explicitly wants them."
|
||||
},
|
||||
|
||||
|
||||
"anthropic": {
|
||||
"models": [
|
||||
"claude-3-5-haiku-latest",
|
||||
"claude-3-5-sonnet-latest",
|
||||
"claude-3-opus-latest"
|
||||
],
|
||||
"recommended": "claude-3-5-haiku-latest"
|
||||
"recommended": "claude-3-5-haiku-latest",
|
||||
"_note": "Not re-verified in 2026-04 refresh — kept from previous curation. Add claude-4.x / claude-4.5 / claude-4.6 / claude-4.7 variants after running the verifier with an Anthropic key."
|
||||
},
|
||||
|
||||
|
||||
"openrouter": {
|
||||
"models": [
|
||||
"meta-llama/llama-3.3-70b-instruct",
|
||||
@@ -50,14 +60,15 @@
|
||||
"meta-llama/llama-3.1-8b-instruct",
|
||||
"anthropic/claude-3.5-haiku",
|
||||
"anthropic/claude-3.5-sonnet",
|
||||
"google/gemini-flash-2.5-flash-lite",
|
||||
"google/gemini-flash-1.5",
|
||||
"openai/gpt-4o-mini",
|
||||
"mistralai/mistral-7b-instruct",
|
||||
"mistralai/mixtral-8x7b-instruct"
|
||||
],
|
||||
"recommended": "meta-llama/llama-3.3-70b-instruct"
|
||||
"recommended": "meta-llama/llama-3.3-70b-instruct",
|
||||
"_note": "Not re-verified in 2026-04 refresh. google/gemini-flash-2.5-flash-lite was malformed in the previous entry and has been replaced with google/gemini-flash-1.5."
|
||||
},
|
||||
|
||||
|
||||
"ollama": {
|
||||
"_note": "Ollama models are local, we don't filter them. User manages their own models.",
|
||||
"models": [],
|
||||
|
||||
@@ -30,6 +30,23 @@ class GeminiProvider(AIProvider):
|
||||
'gemini-1.0-pro',
|
||||
'gemini-pro',
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _has_thinking_mode(model: str) -> bool:
|
||||
"""True for Gemini variants that enable "thinking" by default.
|
||||
|
||||
Gemini 2.5+ and 3.x Pro/Flash models spend output tokens on
|
||||
internal reasoning before emitting the final answer. With a small
|
||||
max_tokens budget (≤250) that consumes the whole allowance and
|
||||
leaves an empty reply. For the short translate/explain use case
|
||||
in ProxMenux we want direct output, so we disable thinking for
|
||||
these. Lite variants (flash-lite) do NOT have thinking enabled
|
||||
and are safe to leave alone.
|
||||
"""
|
||||
m = model.lower()
|
||||
if 'lite' in m:
|
||||
return False
|
||||
return m.startswith('gemini-2.5') or m.startswith('gemini-3')
|
||||
|
||||
def list_models(self) -> List[str]:
|
||||
"""List available Gemini models that support generateContent.
|
||||
@@ -118,6 +135,18 @@ class GeminiProvider(AIProvider):
|
||||
url = f"{self.API_BASE}/{self.model}:generateContent?key={self.api_key}"
|
||||
|
||||
# Gemini uses a specific format with contents array
|
||||
gen_config = {
|
||||
'maxOutputTokens': max_tokens,
|
||||
'temperature': 0.3,
|
||||
}
|
||||
|
||||
# Disable thinking on 2.5+ / 3.x pro & flash models so the limited
|
||||
# output budget actually produces visible text. thinkingBudget=0
|
||||
# is the official switch for this; lite variants and legacy
|
||||
# models don't need (and ignore) the field.
|
||||
if self._has_thinking_mode(self.model):
|
||||
gen_config['thinkingConfig'] = {'thinkingBudget': 0}
|
||||
|
||||
payload = {
|
||||
'systemInstruction': {
|
||||
'parts': [{'text': system_prompt}]
|
||||
@@ -128,10 +157,7 @@ class GeminiProvider(AIProvider):
|
||||
'parts': [{'text': user_message}]
|
||||
}
|
||||
],
|
||||
'generationConfig': {
|
||||
'maxOutputTokens': max_tokens,
|
||||
'temperature': 0.3,
|
||||
}
|
||||
'generationConfig': gen_config,
|
||||
}
|
||||
|
||||
headers = {
|
||||
|
||||
@@ -37,23 +37,49 @@ class OpenAIProvider(AIProvider):
|
||||
|
||||
# Recommended models for chat (in priority order)
|
||||
RECOMMENDED_PREFIXES = ['gpt-4o-mini', 'gpt-4o', 'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo']
|
||||
|
||||
@staticmethod
|
||||
def _is_reasoning_model(model: str) -> bool:
|
||||
"""True for OpenAI reasoning models (o-series + non-chat gpt-5+).
|
||||
|
||||
These use a stricter API contract than chat models:
|
||||
- Must use ``max_completion_tokens`` instead of ``max_tokens``
|
||||
- ``temperature`` is not accepted (only the default is supported)
|
||||
|
||||
Chat-optimized variants (``gpt-5-chat-latest``,
|
||||
``gpt-5.1-chat-latest``, etc.) keep the classic contract and are
|
||||
NOT flagged here.
|
||||
"""
|
||||
m = model.lower()
|
||||
# o1, o3, o4, o5 ... (o<digit>...)
|
||||
if len(m) >= 2 and m[0] == 'o' and m[1].isdigit():
|
||||
return True
|
||||
# gpt-5, gpt-5-mini, gpt-5.1, gpt-5.2-pro ... EXCEPT *-chat-latest
|
||||
if m.startswith('gpt-5') and '-chat' not in m:
|
||||
return True
|
||||
return False
|
||||
|
||||
def list_models(self) -> List[str]:
|
||||
"""List available OpenAI models for chat completions.
|
||||
|
||||
Filters to only chat-capable models, excluding:
|
||||
- Embedding models
|
||||
- Audio/speech models (whisper, tts)
|
||||
- Image models (dall-e)
|
||||
- Instruct models (different API)
|
||||
- Legacy models (babbage, davinci, etc.)
|
||||
|
||||
"""List available models for chat completions.
|
||||
|
||||
Two modes:
|
||||
- Official OpenAI (no custom base_url): restrict to GPT chat models,
|
||||
excluding embedding/whisper/tts/dall-e/instruct/legacy variants.
|
||||
- OpenAI-compatible endpoint (LiteLLM, MLX, LM Studio, vLLM,
|
||||
LocalAI, Ollama-proxy, etc.): the "gpt" substring check is
|
||||
dropped so user-served models (e.g. ``mlx-community/Llama-3.1-8B``,
|
||||
``Qwen3-32B``, ``mistralai/...``) show up. EXCLUDED_PATTERNS
|
||||
still applies — embeddings/whisper/tts aren't chat-capable on
|
||||
any backend.
|
||||
|
||||
Returns:
|
||||
List of model IDs suitable for chat completions.
|
||||
"""
|
||||
if not self.api_key:
|
||||
return []
|
||||
|
||||
|
||||
is_custom_endpoint = bool(self.base_url)
|
||||
|
||||
try:
|
||||
# Determine models URL from base_url if set
|
||||
if self.base_url:
|
||||
@@ -63,42 +89,46 @@ class OpenAIProvider(AIProvider):
|
||||
models_url = f"{base}/models"
|
||||
else:
|
||||
models_url = self.DEFAULT_MODELS_URL
|
||||
|
||||
|
||||
req = urllib.request.Request(
|
||||
models_url,
|
||||
headers={'Authorization': f'Bearer {self.api_key}'},
|
||||
method='GET'
|
||||
)
|
||||
|
||||
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read().decode('utf-8'))
|
||||
|
||||
|
||||
models = []
|
||||
for model in data.get('data', []):
|
||||
model_id = model.get('id', '')
|
||||
if not model_id:
|
||||
continue
|
||||
|
||||
|
||||
model_lower = model_id.lower()
|
||||
|
||||
# Must be a GPT model
|
||||
if 'gpt' not in model_lower:
|
||||
|
||||
# Official OpenAI: restrict to GPT chat models. Custom
|
||||
# endpoints serve arbitrarily named models, so this
|
||||
# substring check would drop every valid result there.
|
||||
if not is_custom_endpoint and 'gpt' not in model_lower:
|
||||
continue
|
||||
|
||||
# Exclude non-chat models
|
||||
|
||||
# Exclude non-chat models on every backend.
|
||||
if any(pattern in model_lower for pattern in self.EXCLUDED_PATTERNS):
|
||||
continue
|
||||
|
||||
|
||||
models.append(model_id)
|
||||
|
||||
# Sort with recommended models first
|
||||
|
||||
# Sort with recommended models first (only meaningful for OpenAI
|
||||
# official; on custom endpoints the prefixes rarely match, so
|
||||
# entries fall through to alphabetical order, which is fine).
|
||||
def sort_key(m):
|
||||
m_lower = m.lower()
|
||||
for i, prefix in enumerate(self.RECOMMENDED_PREFIXES):
|
||||
if m_lower.startswith(prefix):
|
||||
return (i, m)
|
||||
return (len(self.RECOMMENDED_PREFIXES), m)
|
||||
|
||||
|
||||
return sorted(models, key=sort_key)
|
||||
except Exception as e:
|
||||
print(f"[OpenAIProvider] Failed to list models: {e}")
|
||||
@@ -133,17 +163,35 @@ class OpenAIProvider(AIProvider):
|
||||
"""
|
||||
if not self.api_key:
|
||||
raise AIProviderError("API key required for OpenAI")
|
||||
|
||||
|
||||
payload = {
|
||||
'model': self.model,
|
||||
'messages': [
|
||||
{'role': 'system', 'content': system_prompt},
|
||||
{'role': 'user', 'content': user_message},
|
||||
],
|
||||
'max_tokens': max_tokens,
|
||||
'temperature': 0.3,
|
||||
}
|
||||
|
||||
|
||||
# Reasoning models (o1/o3/o4/gpt-5*, excluding *-chat-latest) use a
|
||||
# different parameter contract: max_completion_tokens instead of
|
||||
# max_tokens, and no temperature field. Sending the classic chat
|
||||
# parameters to them produces HTTP 400 Bad Request.
|
||||
#
|
||||
# They also spend output budget on internal reasoning by default,
|
||||
# which empties the user-visible reply when max_tokens is small
|
||||
# (like the ~200 we use for notifications). reasoning_effort
|
||||
# 'minimal' keeps that internal reasoning to a minimum so the
|
||||
# entire budget is available for the translation, which is
|
||||
# exactly what this pipeline wants. OpenAI documents 'minimal',
|
||||
# 'low', 'medium', 'high' — 'minimal' is the right setting for a
|
||||
# straightforward translate+explain task.
|
||||
if self._is_reasoning_model(self.model):
|
||||
payload['max_completion_tokens'] = max_tokens
|
||||
payload['reasoning_effort'] = 'minimal'
|
||||
else:
|
||||
payload['max_tokens'] = max_tokens
|
||||
payload['temperature'] = 0.3
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
|
||||
@@ -220,10 +220,20 @@ def get_provider_models():
|
||||
|
||||
# Get all models from provider API
|
||||
api_models = ai_provider.list_models()
|
||||
|
||||
|
||||
# OpenAI with a custom base URL means an OpenAI-compatible endpoint
|
||||
# (LiteLLM, MLX, LM Studio, vLLM, LocalAI, Ollama-proxy...). The
|
||||
# verified_ai_models.json list only contains official OpenAI IDs
|
||||
# (gpt-4o-mini etc.), so intersecting against it would strip every
|
||||
# model the user actually serves. Treat the custom-endpoint case
|
||||
# like Ollama: return whatever the endpoint advertises, no filter.
|
||||
is_openai_compat = (provider == 'openai' and bool(openai_base_url))
|
||||
|
||||
if not api_models:
|
||||
# API failed, fall back to verified list only
|
||||
if verified_models:
|
||||
# API failed, fall back to verified list only (but not for
|
||||
# custom endpoints — we don't know what the endpoint serves,
|
||||
# so "gpt-4o-mini" as a fallback would be misleading).
|
||||
if verified_models and not is_openai_compat:
|
||||
models = sorted(verified_models)
|
||||
return jsonify({
|
||||
'success': True,
|
||||
@@ -232,27 +242,38 @@ def get_provider_models():
|
||||
'message': f'{len(models)} verified models (API unavailable)'
|
||||
})
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'models': [],
|
||||
'message': 'Could not retrieve models. Check your API key.'
|
||||
'success': False,
|
||||
'models': [],
|
||||
'message': 'Could not retrieve models. Check your API key and endpoint URL.'
|
||||
})
|
||||
|
||||
|
||||
if is_openai_compat:
|
||||
# Custom OpenAI-compatible endpoint: surface every model the
|
||||
# endpoint reports. No verified-list intersection.
|
||||
models = sorted(api_models)
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'models': models,
|
||||
'recommended': models[0] if models else '',
|
||||
'message': f'Found {len(models)} models on custom endpoint'
|
||||
})
|
||||
|
||||
# Filter: only models that are BOTH in API and verified list
|
||||
if verified_models:
|
||||
api_models_set = set(api_models)
|
||||
filtered_models = [m for m in verified_models if m in api_models_set]
|
||||
|
||||
|
||||
if not filtered_models:
|
||||
# No intersection - maybe verified list is outdated
|
||||
# Return verified list anyway (will fail on use if truly unavailable)
|
||||
filtered_models = list(verified_models)
|
||||
|
||||
|
||||
# Sort with recommended first
|
||||
def sort_key(m):
|
||||
if m == recommended:
|
||||
return (0, m)
|
||||
return (1, m)
|
||||
|
||||
|
||||
models = sorted(filtered_models, key=sort_key)
|
||||
else:
|
||||
# No verified list for this provider, return all from API
|
||||
|
||||
@@ -6151,6 +6151,211 @@ def get_network_hardware_info(pci_slot):
|
||||
|
||||
return net_info
|
||||
|
||||
def _get_sriov_info(slot):
|
||||
"""Return SR-IOV role for a PCI slot via sysfs.
|
||||
|
||||
Reads /sys/bus/pci/devices/<BDF>/ for:
|
||||
- physfn symlink → slot is a Virtual Function; link target is its PF
|
||||
- sriov_numvfs → active VF count if slot is a Physical Function
|
||||
- sriov_totalvfs → maximum VFs this PF can spawn
|
||||
|
||||
Returns a dict ready to merge into the GPU object, or {} on any error.
|
||||
The 'role' key uses the same vocabulary as _pci_sriov_role in the
|
||||
bash helpers (pci_passthrough_helpers.sh): vf | pf-active | pf-idle | none.
|
||||
"""
|
||||
try:
|
||||
bdf = slot if slot.startswith('0000:') else f'0000:{slot}'
|
||||
base = f'/sys/bus/pci/devices/{bdf}'
|
||||
if not os.path.isdir(base):
|
||||
return {}
|
||||
|
||||
physfn = os.path.join(base, 'physfn')
|
||||
if os.path.islink(physfn):
|
||||
parent = os.path.basename(os.path.realpath(physfn))
|
||||
return {
|
||||
'sriov_role': 'vf',
|
||||
'sriov_physfn': parent,
|
||||
}
|
||||
|
||||
totalvfs_path = os.path.join(base, 'sriov_totalvfs')
|
||||
if not os.path.isfile(totalvfs_path):
|
||||
return {'sriov_role': 'none'}
|
||||
|
||||
try:
|
||||
totalvfs = int((open(totalvfs_path).read() or '0').strip() or 0)
|
||||
except (ValueError, OSError):
|
||||
totalvfs = 0
|
||||
if totalvfs <= 0:
|
||||
return {'sriov_role': 'none'}
|
||||
|
||||
try:
|
||||
numvfs = int((open(os.path.join(base, 'sriov_numvfs')).read() or '0').strip() or 0)
|
||||
except (ValueError, OSError):
|
||||
numvfs = 0
|
||||
|
||||
return {
|
||||
'sriov_role': 'pf-active' if numvfs > 0 else 'pf-idle',
|
||||
'sriov_vf_count': numvfs,
|
||||
'sriov_totalvfs': totalvfs,
|
||||
}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _sriov_list_vfs_of_pf(pf_bdf):
|
||||
"""Return sorted list of VF BDFs that belong to a Physical Function.
|
||||
Reads /sys/bus/pci/devices/<PF>/virtfn<N> symlinks (one per VF).
|
||||
"""
|
||||
try:
|
||||
pf_full = pf_bdf if pf_bdf.startswith('0000:') else f'0000:{pf_bdf}'
|
||||
base = f'/sys/bus/pci/devices/{pf_full}'
|
||||
if not os.path.isdir(base):
|
||||
return []
|
||||
# virtfn links are numbered (virtfn0, virtfn1, ...) and point to the VF.
|
||||
entries = sorted(glob.glob(f'{base}/virtfn*'),
|
||||
key=lambda p: int(re.search(r'virtfn(\d+)', p).group(1))
|
||||
if re.search(r'virtfn(\d+)', p) else 0)
|
||||
return [os.path.basename(os.path.realpath(p)) for p in entries]
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def _sriov_pci_driver(bdf):
|
||||
"""Return the current driver bound to a PCI BDF, '' if unbound."""
|
||||
try:
|
||||
link = f'/sys/bus/pci/devices/{bdf}/driver'
|
||||
if os.path.islink(link):
|
||||
return os.path.basename(os.path.realpath(link))
|
||||
except Exception:
|
||||
pass
|
||||
return ''
|
||||
|
||||
|
||||
def _sriov_pci_render_node(bdf):
|
||||
"""If the device exposes a DRM render node, return '/dev/dri/renderDX'.
|
||||
LXC containers consume GPUs through these nodes, so this lets us
|
||||
cross-reference an LXC's `dev<N>: /dev/dri/renderD<N>` config line
|
||||
back to a specific VF.
|
||||
"""
|
||||
try:
|
||||
drm_dir = f'/sys/bus/pci/devices/{bdf}/drm'
|
||||
if not os.path.isdir(drm_dir):
|
||||
return ''
|
||||
for name in sorted(os.listdir(drm_dir)):
|
||||
if name.startswith('renderD'):
|
||||
return f'/dev/dri/{name}'
|
||||
except Exception:
|
||||
pass
|
||||
return ''
|
||||
|
||||
|
||||
def _sriov_guest_running(guest_type, gid):
|
||||
"""Best-effort status check. Returns True if running, False otherwise."""
|
||||
try:
|
||||
cmd = ['qm' if guest_type == 'vm' else 'pct', 'status', str(gid)]
|
||||
r = subprocess.run(cmd, capture_output=True, text=True, timeout=3)
|
||||
return 'running' in (r.stdout or '').lower()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _sriov_find_guest_consumer(bdf):
|
||||
"""Find the VM or LXC that consumes a given VF (or PF) on the host.
|
||||
|
||||
VMs: scan /etc/pve/qemu-server/*.conf for a `hostpci<N>: ` line that
|
||||
references the BDF (short or full form, possibly alongside other
|
||||
ids separated by ';' and trailing options after ',').
|
||||
LXCs: resolve the BDF to its DRM render node (if any) and scan
|
||||
/etc/pve/lxc/*.conf for `dev<N>:` or `lxc.mount.entry:` lines that
|
||||
reference that node.
|
||||
|
||||
Returns {type, id, name, running} or None.
|
||||
"""
|
||||
short_bdf = bdf[5:] if bdf.startswith('0000:') else bdf
|
||||
full_bdf = bdf if bdf.startswith('0000:') else f'0000:{bdf}'
|
||||
|
||||
# ── VM scan ──
|
||||
try:
|
||||
for conf in sorted(glob.glob('/etc/pve/qemu-server/*.conf')):
|
||||
try:
|
||||
with open(conf, 'r') as f:
|
||||
text = f.read()
|
||||
except OSError:
|
||||
continue
|
||||
if re.search(
|
||||
rf'^hostpci\d+:\s*[^\n]*(?:0000:)?{re.escape(short_bdf)}(?:[,;\s]|$)',
|
||||
text, re.MULTILINE,
|
||||
):
|
||||
vmid = os.path.basename(conf)[:-5] # strip '.conf'
|
||||
nm = re.search(r'^name:\s*(\S+)', text, re.MULTILINE)
|
||||
name = nm.group(1) if nm else ''
|
||||
return {
|
||||
'type': 'vm',
|
||||
'id': vmid,
|
||||
'name': name,
|
||||
'running': _sriov_guest_running('vm', vmid),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── LXC scan (via render node) ──
|
||||
render_node = _sriov_pci_render_node(full_bdf)
|
||||
if render_node:
|
||||
try:
|
||||
for conf in sorted(glob.glob('/etc/pve/lxc/*.conf')):
|
||||
try:
|
||||
with open(conf, 'r') as f:
|
||||
text = f.read()
|
||||
except OSError:
|
||||
continue
|
||||
if re.search(
|
||||
rf'^(?:dev\d+|lxc\.mount\.entry):\s*[^\n]*{re.escape(render_node)}(?:[,;\s]|$)',
|
||||
text, re.MULTILINE,
|
||||
):
|
||||
ctid = os.path.basename(conf)[:-5]
|
||||
nm = re.search(r'^hostname:\s*(\S+)', text, re.MULTILINE)
|
||||
name = nm.group(1) if nm else ''
|
||||
return {
|
||||
'type': 'lxc',
|
||||
'id': ctid,
|
||||
'name': name,
|
||||
'running': _sriov_guest_running('lxc', ctid),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _sriov_enrich_detail(gpu):
|
||||
"""On-demand enrichment for the GPU detail modal.
|
||||
|
||||
For a PF with active VFs, populates gpu['sriov_vfs'] with per-VF driver
|
||||
and consumer info. For a VF, populates gpu['sriov_consumer'] with the
|
||||
guest (if any) currently referencing it. Heavier than _get_sriov_info()
|
||||
because it scans guest configs, so it is NOT called from the hardware
|
||||
snapshot path — only from the realtime endpoint.
|
||||
"""
|
||||
role = gpu.get('sriov_role')
|
||||
slot = gpu.get('slot', '')
|
||||
if not slot:
|
||||
return
|
||||
full_bdf = slot if slot.startswith('0000:') else f'0000:{slot}'
|
||||
|
||||
if role == 'pf-active':
|
||||
vf_list = []
|
||||
for vf_bdf in _sriov_list_vfs_of_pf(full_bdf):
|
||||
vf_list.append({
|
||||
'bdf': vf_bdf,
|
||||
'driver': _sriov_pci_driver(vf_bdf) or '',
|
||||
'render_node': _sriov_pci_render_node(vf_bdf) or '',
|
||||
'consumer': _sriov_find_guest_consumer(vf_bdf),
|
||||
})
|
||||
gpu['sriov_vfs'] = vf_list
|
||||
elif role == 'vf':
|
||||
gpu['sriov_consumer'] = _sriov_find_guest_consumer(full_bdf)
|
||||
|
||||
|
||||
def get_gpu_info():
|
||||
"""Detect and return information about GPUs in the system"""
|
||||
gpus = []
|
||||
@@ -6196,7 +6401,11 @@ def get_gpu_info():
|
||||
gpu['pci_class'] = pci_info.get('class', '')
|
||||
gpu['pci_driver'] = pci_info.get('driver', '')
|
||||
gpu['pci_kernel_module'] = pci_info.get('kernel_module', '')
|
||||
|
||||
|
||||
sriov_fields = _get_sriov_info(slot)
|
||||
if sriov_fields:
|
||||
gpu.update(sriov_fields)
|
||||
|
||||
# detailed_info = get_detailed_gpu_info(gpu) # Removed this call here
|
||||
# gpu.update(detailed_info) # It will be called later in api_gpu_realtime
|
||||
|
||||
@@ -10010,7 +10219,12 @@ def api_gpu_realtime(slot):
|
||||
pass
|
||||
detailed_info = get_detailed_gpu_info(gpu)
|
||||
gpu.update(detailed_info)
|
||||
|
||||
|
||||
# SR-IOV detail is only relevant when the modal is actually open,
|
||||
# so we build it on demand here (not in get_gpu_info) to avoid
|
||||
# scanning every guest config on the hardware snapshot path.
|
||||
_sriov_enrich_detail(gpu)
|
||||
|
||||
# Extract only the monitoring-related fields
|
||||
realtime_data = {
|
||||
'has_monitoring_tool': gpu.get('has_monitoring_tool', False),
|
||||
@@ -10035,9 +10249,17 @@ def api_gpu_realtime(slot):
|
||||
# Added for NVIDIA/AMD specific engine info if available
|
||||
'engine_encoder': gpu.get('engine_encoder'),
|
||||
'engine_decoder': gpu.get('engine_decoder'),
|
||||
'driver_version': gpu.get('driver_version') # Added driver_version
|
||||
'driver_version': gpu.get('driver_version'), # Added driver_version
|
||||
# SR-IOV modal detail (populated only when the GPU is an SR-IOV
|
||||
# Physical Function with active VFs, or a Virtual Function).
|
||||
'sriov_role': gpu.get('sriov_role'),
|
||||
'sriov_physfn': gpu.get('sriov_physfn'),
|
||||
'sriov_vf_count': gpu.get('sriov_vf_count'),
|
||||
'sriov_totalvfs': gpu.get('sriov_totalvfs'),
|
||||
'sriov_vfs': gpu.get('sriov_vfs'),
|
||||
'sriov_consumer': gpu.get('sriov_consumer'),
|
||||
}
|
||||
|
||||
|
||||
return jsonify(realtime_data)
|
||||
except Exception as e:
|
||||
# print(f"[v0] Error getting real-time GPU data: {e}")
|
||||
|
||||
@@ -882,7 +882,15 @@ class JournalWatcher:
|
||||
smart_health = self._quick_smart_health(resolved)
|
||||
if smart_health != 'FAILED':
|
||||
return
|
||||
|
||||
|
||||
# ── Persist observation (before the cooldown gate) ──
|
||||
# The 24h cooldown below only suppresses RE-notification; the
|
||||
# per-disk observations history must reflect every genuine
|
||||
# detection. The DB UPSERT dedups same-signature events via
|
||||
# occurrence_count, so calling this on every match is safe.
|
||||
# Aligns with the parallel path in HealthMonitor._check_disks_optimized.
|
||||
self._record_disk_io_observation(resolved, msg)
|
||||
|
||||
# ── Gate 2: 24-hour dedup per device ──
|
||||
# Check both in-memory cache AND the DB (user dismiss clears DB cooldowns).
|
||||
# If user dismissed the error, _clear_disk_io_cooldown() removed the DB
|
||||
@@ -986,6 +994,55 @@ class JournalWatcher:
|
||||
except Exception:
|
||||
return 'UNKNOWN'
|
||||
|
||||
def _record_disk_io_observation(self, resolved: str, msg: str):
|
||||
"""Persist a kernel-journal I/O error as a disk observation.
|
||||
|
||||
Signature classification mirrors HealthMonitor._make_io_obs_signature
|
||||
so observations from the real-time journal watcher and the periodic
|
||||
dmesg scan dedup into the same row (via the UPSERT on
|
||||
disk_registry_id + error_type + error_signature).
|
||||
"""
|
||||
try:
|
||||
from health_persistence import health_persistence
|
||||
|
||||
m = msg.lower()
|
||||
if re.search(r'exception\s+emask|emask\s+0x|revalidation failed|'
|
||||
r'hard resetting link|serror.*badcrc|comreset|'
|
||||
r'link is slow|status.*drdy', m):
|
||||
family = 'ata_connection_error'
|
||||
elif re.search(r'i/o error|blk_update_request|medium error|sense key', m):
|
||||
family = 'block_io_error'
|
||||
elif re.search(r'failed command|fpdma queued', m):
|
||||
family = 'ata_failed_command'
|
||||
else:
|
||||
family = 'generic'
|
||||
|
||||
# Best-effort serial lookup so the observation survives device
|
||||
# renames (ata8 -> sdh, USB reconnects, etc.).
|
||||
serial = None
|
||||
try:
|
||||
sm = subprocess.run(
|
||||
['smartctl', '-i', f'/dev/{resolved}'],
|
||||
capture_output=True, text=True, timeout=3)
|
||||
if sm.returncode in (0, 4):
|
||||
for line in sm.stdout.split('\n'):
|
||||
if 'Serial Number' in line or 'Serial number' in line:
|
||||
serial = line.split(':')[-1].strip()
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
health_persistence.record_disk_observation(
|
||||
device_name=resolved,
|
||||
serial=serial,
|
||||
error_type='io_error',
|
||||
error_signature=f'io_{resolved}_{family}',
|
||||
raw_message=f'/dev/{resolved}: {msg.strip()[:200]}',
|
||||
severity='critical',
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[JournalWatcher] Error recording disk io observation: {e}")
|
||||
|
||||
def _record_smartd_observation(self, title: str, message: str):
|
||||
"""Extract device info from a smartd system-mail and record as disk observation."""
|
||||
try:
|
||||
|
||||
@@ -190,6 +190,34 @@ export interface GPU {
|
||||
}>
|
||||
has_monitoring_tool?: boolean
|
||||
note?: string
|
||||
// SR-IOV state — populated from sysfs (physfn symlink + sriov_{num,total}vfs).
|
||||
// "vf" — this slot is a Virtual Function; sriov_physfn is its PF.
|
||||
// "pf-active" — this slot is a Physical Function with sriov_vf_count > 0.
|
||||
// "pf-idle" — SR-IOV capable PF but no VFs currently active.
|
||||
// "none" — not involved in SR-IOV.
|
||||
sriov_role?: "vf" | "pf-active" | "pf-idle" | "none"
|
||||
sriov_physfn?: string
|
||||
sriov_vf_count?: number
|
||||
sriov_totalvfs?: number
|
||||
// SR-IOV detail — only populated by the /api/gpu/<slot>/realtime endpoint
|
||||
// when the modal is open (scanning guest configs is too expensive for the
|
||||
// hardware snapshot path).
|
||||
sriov_vfs?: SriovVfDetail[] // filled when role === "pf-active"
|
||||
sriov_consumer?: SriovConsumer | null // filled when role === "vf"
|
||||
}
|
||||
|
||||
export interface SriovVfDetail {
|
||||
bdf: string // e.g. "0000:00:02.1"
|
||||
driver: string // current kernel driver (i915, vfio-pci, ...)
|
||||
render_node: string // "" when the VF does not expose a DRM node
|
||||
consumer: SriovConsumer | null // which guest is using this VF, if any
|
||||
}
|
||||
|
||||
export interface SriovConsumer {
|
||||
type: "vm" | "lxc"
|
||||
id: string // VMID or CTID
|
||||
name: string // VM name / LXC hostname
|
||||
running: boolean
|
||||
}
|
||||
|
||||
export interface DiskHardwareInfo {
|
||||
|
||||
+156
-4
@@ -1,4 +1,159 @@
|
||||
# <img src="https://raw.githubusercontent.com/MacRimi/ProxMenux/main/images/logo.png" alt="ProxMenux logo" width="40"/> ProxMenux v1.2.0 — *AI-Enhanced Monitoring*
|
||||
|
||||
## 2026-04-20
|
||||
|
||||
### New version ProxMenux v1.2.1 — *SR-IOV Awareness & GPU Passthrough Hardening*
|
||||
|
||||
Targeted release on top of **v1.2.0** addressing three community-reported areas: complete SR-IOV awareness across the GPU/PCI subsystem, robust handling of GPU + audio companions during passthrough attach and detach (Intel iGPU with chipset audio, discrete cards with HDMI audio, mixed-GPU VMs), and compatibility fixes for AI notification providers (OpenAI-compatible custom endpoints such as LiteLLM/MLX/LM Studio, OpenAI reasoning models, and Gemini 2.5+/3.x thinking models). Also includes quality-of-life improvements in the NVIDIA installer, the disk health monitor, and the LXC lifecycle helpers used by the passthrough wizards.
|
||||
|
||||
---
|
||||
|
||||
## 🎛️ SR-IOV Awareness Across the GPU Subsystem
|
||||
|
||||
Intel `i915-sriov-dkms` and AMD MxGPU split a GPU's Physical Function (PF) into Virtual Functions (VFs) that can be assigned independently to LXCs and VMs. Previously ProxMenux had zero SR-IOV awareness: it treated VFs and PFs identically, which could rewrite `vfio.conf` with the PF's vendor:device ID, collapse the VF tree on the next boot, and leave users unable to start their guests. Every path that could disrupt an active VF tree has been audited and hardened.
|
||||
|
||||
### Detection helpers
|
||||
- New `_pci_is_vf`, `_pci_has_active_vfs`, `_pci_sriov_role`, `_pci_sriov_filter_array` in `scripts/global/pci_passthrough_helpers.sh`
|
||||
- HTTP/JSON equivalents in the Flask GPU route — the Monitor UI reads VF/PF state directly from sysfs (`physfn`, `sriov_totalvfs`, `sriov_numvfs`, `virtfn*`)
|
||||
|
||||
### Pre-start hook (`gpu_hook_guard_helpers.sh`)
|
||||
The VM pre-start guard now recognises Virtual Functions. Both the slot-only syntax branch (which used to iterate every function of the slot and demand `vfio-pci` everywhere) and the full-BDF branch skip VFs, so Proxmox can perform its per-VF vfio-pci rebind as usual. The false "GPU passthrough device is not ready" block on SR-IOV VMs is gone.
|
||||
|
||||
### Mode-switch scripts refuse SR-IOV operations
|
||||
`switch_gpu_mode.sh`, `switch_gpu_mode_direct.sh`, `add_gpu_vm.sh`, `add_gpu_lxc.sh`, `vm_creator.sh`, `synology.sh`, `zimaos.sh` and `add_controller_nvme_vm.sh` all reject VFs and PFs with active VFs before touching host configuration. A clear "SR-IOV Configuration Detected" dialog explains the situation. For wizards invoked mid-flow (VM creators) the message is delivered through `whiptail` so it interrupts cleanly, followed by a per-device `msg_warn` line for the log trail.
|
||||
|
||||
### New "SR-IOV active" state in the Monitor UI
|
||||
The GPU card in the Hardware page gains a third visual state with a dedicated teal colour, an in-line `SR-IOV ×N` pill (or `SR-IOV VF` for a Virtual Function), and dashed/faded LXC and VM branches. The Edit button is hidden because the state is hardware-managed.
|
||||
|
||||

|
||||
|
||||
### Modal dashboard for SR-IOV GPUs
|
||||
Opening the modal for a Physical Function with active VFs now shows:
|
||||
- Aggregate-metrics banner ("Metrics below reflect the Physical Function, aggregate across N VFs")
|
||||
- Normal GPU real-time telemetry for the PF
|
||||
- A **Virtual Functions** table, one row per VF, with the current driver (`i915`, `vfio-pci`, unbound) and the specific VM or LXC that consumes it, including running/stopped state — consumers are discovered by cross-referencing `hostpci` entries and `/dev/dri/renderDN` mount lines against the VF's BDF and DRM render node
|
||||
|
||||
Opening the modal for a Virtual Function shows its parent PF (clickable to navigate back to the PF's modal), current driver, and consumer.
|
||||
|
||||
### VM Conflict Policy popup no longer fires for SR-IOV VFs
|
||||
The regex in `detect_affected_vms_for_selected` matched the slot (`00:02`) against VMs that had a VF (`00:02.1`) assigned, producing a confusing "Keep GPU in VM config" dialog. With the SR-IOV gate upstream, the flow never reaches that code path for SR-IOV slots.
|
||||
|
||||
---
|
||||
|
||||
## 🔊 GPU + Audio Passthrough — Full Lifecycle Hardening
|
||||
|
||||
A round of fixes around how GPU passthrough handles its audio companion device. Previously, only the `.1` sibling of a discrete GPU was picked up automatically; Intel iGPU passthrough to a VM — where the audio lives separately on the chipset at `00:1f.3` and not at `00:02.1` — was silently skipped. On detach, the old `sed` that wiped hostpci lines by slot substring could also remove an unrelated GPU whose BDF happened to contain the search slot as a substring (e.g. slot `00:02` matching inside `0000:02:00.0`). Both paths are now robust.
|
||||
|
||||
### iGPU audio-companion checklist on attach
|
||||
`add_gpu_vm.sh::detect_optional_gpu_audio` keeps the auto-include fast path for the classic `.1` sibling (discrete NVIDIA / AMD with HDMI audio on the card). When no `.1` audio exists, the script now:
|
||||
- Scans sysfs for every PCI audio controller on the host
|
||||
- Skips anything already covered by the GPU's IOMMU group
|
||||
- Asks the user via a `_pmx_checklist` (`dialog` in standalone mode, `whiptail` in wizard mode called from `vm_creator`/`synology`/`zimaos`) which audio controllers to pass through alongside the GPU
|
||||
- Displays each entry with its current host driver (`snd_hda_intel`, `snd_hda_codec_*`, etc.) so the decision is informed
|
||||
- Defaults to **none** — the user actively opts in
|
||||
|
||||
### Orphan audio cascade on detach
|
||||
When the user picks "Remove GPU from VM config" during a mode switch, the scripts now follow up with a targeted cleanup:
|
||||
- `switch_gpu_mode.sh`, `switch_gpu_mode_direct.sh` and `add_gpu_vm.sh::cleanup_vm_config` (source-VM cleanup on the "move GPU" flow) all call the shared helper `_vm_list_orphan_audio_hostpci`
|
||||
- The helper uses a two-pass scan of the VM config: pass 1 records slot bases of display/3D hostpci entries; pass 2 classifies audio entries and **skips any audio whose slot still has a display sibling in the same VM** — protecting the HDMI audio of other dGPUs left in the VM
|
||||
- Previously the bare substring match would have flagged NVIDIA's `02:00.1` as orphan when detaching an Intel iGPU at `00:02.0`
|
||||
- The interactive switch flow confirms removals with a `dialog` checklist (default ON). The web variant auto-removes without prompting — the runner has no good way to render a checklist — and logs every BDF it touched
|
||||
|
||||
### vfio.conf cascade extension
|
||||
For each audio removed by the cascade, the switch-mode scripts now check whether its BDF is still referenced by any other VM via `_pci_bdf_in_any_vm`. If nothing else uses it, the `vendor:device` is appended to `SELECTED_IOMMU_IDS` before the `/etc/modprobe.d/vfio.conf` update runs. That closes the loop for the Intel iGPU case: `8086:51c8` (PCH HD Audio) is now pulled from `vfio.conf` alongside `8086:46a3` (iGPU) when both leave VM mode and no other VM references them. If another VM still uses the audio, the ID is deliberately kept — no breaking side effects on other VMs. `add_gpu_vm.sh` does NOT extend the cleanup in the *move* flow, because the GPU is still in use elsewhere and its IDs must remain.
|
||||
|
||||
### Precise hostpci removal regex
|
||||
Every inline `sed` used to detach a GPU from a VM config previously matched the slot as a free substring:
|
||||
```
|
||||
/^hostpci[0-9]+:.*${slot}/d
|
||||
```
|
||||
For `slot=00:02` that pattern matches the substring inside `0000:02:00.0` (an unrelated NVIDIA dGPU at slot `02:00`) and would wipe both cards. The fix anchors the match to the real BDF shape:
|
||||
```
|
||||
/^hostpci[0-9]+:[[:space:]]*(0000:)?${slot}\.[0-7]([,[:space:]]|$)/d
|
||||
```
|
||||
Applied in `switch_gpu_mode.sh`, `switch_gpu_mode_direct.sh` and `add_gpu_vm.sh::cleanup_vm_config`. The awk-based helper in `vm_storage_helpers.sh::_remove_pci_slot_from_vm_config` (used by the NVMe wizards) already used the correct pattern and did not need changes.
|
||||
|
||||
---
|
||||
|
||||
## 🤖 AI Provider Compatibility — OpenAI-Compatible, Reasoning & Thinking Models
|
||||
|
||||
Three coordinated fixes that unblock model categories previously rejected by the notification enhancement pipeline.
|
||||
|
||||
### OpenAI-compatible endpoints
|
||||
LiteLLM, MLX, LM Studio, vLLM, LocalAI, Ollama-proxy — the provider's `list_models()` used to require `"gpt"` in every model name, so local setups serving `mlx-community/...`, `Qwen3-...`, `mistralai/...` saw an empty model list. When a Custom Base URL is set, the `"gpt"` substring check is now skipped and `EXCLUDED_PATTERNS` (embeddings, whisper, tts, dall-e) is the only filter. The Flask route layer also stops intersecting the result against `verified_ai_models.json` for custom endpoints — the verified list only describes OpenAI's official model IDs and was erasing every local model the user actually served.
|
||||
|
||||
### OpenAI reasoning models
|
||||
`o1`, `o3`, `o3-mini`, `o4-mini`, `gpt-5`, `gpt-5-mini`, `gpt-5.1`, `gpt-5.2-pro`, `gpt-5.4-nano`, etc. (excluding the `*-chat-latest` variants) use a stricter API contract: `max_completion_tokens` instead of `max_tokens`, no `temperature`. Sending the classic chat parameters produced HTTP 400 Bad Request for every one of them. A detector in `openai_provider.py` now branches the payload accordingly and sets `reasoning_effort: "minimal"` — by default these models spend their output budget on internal reasoning and return an empty reply for the short notification-translation request.
|
||||
|
||||
### Gemini 2.5+ / 3.x thinking models
|
||||
`gemini-2.5-flash`, `2.5-pro`, `gemini-3-pro-preview`, `gemini-3.1-pro-preview`, etc. have internal "thinking" enabled by default. With the small token budget used for notification enrichment (≤250 tokens), the thinking budget consumed the entire allowance and the model returned empty output with `finishReason: MAX_TOKENS`. `gemini_provider.py` now sets `thinkingConfig.thinkingBudget: 0` for non-`lite` variants of 2.5+ and 3.x, so the available tokens go to the user-visible response. Lite variants (no thinking enabled) are untouched.
|
||||
|
||||
---
|
||||
|
||||
## 📋 Verified AI Models Refresh
|
||||
|
||||
`AppImage/config/verified_ai_models.json` refreshed for the providers re-tested against live APIs. The new private maintenance tool (kept out of the AppImage) re-runs a standardised translate+explain test against every model each provider advertises, classifies pass / warn / fail, and prints a ready-to-paste JSON snippet. Re-run before each ProxMenux release to keep the list current.
|
||||
|
||||
| Provider | New recommended | Notes |
|
||||
|----------|-----------------|-------|
|
||||
| **OpenAI** | `gpt-4.1-nano` | `gpt-4.1-nano`, `gpt-4.1-mini`, `gpt-4o-mini`, `gpt-4.1`, `gpt-4o`, `gpt-5-chat-latest`, plus `gpt-5.4-nano` / `gpt-5.4-mini` from 2026-03. Dated snapshots and legacy models excluded. Reasoning models supported by code but not listed by default — slower / costlier without improving notification quality |
|
||||
| **Gemini** | `gemini-2.5-flash-lite` | `gemini-2.5-flash-lite`, `gemini-2.5-flash` (works now), `gemini-3-flash-preview`. `latest` aliases intentionally omitted — resolved to different models across runs and produced timeouts in some regions. Pro variants reject `thinkingBudget=0` and are overkill for notification translation |
|
||||
| Groq / Anthropic / OpenRouter | *unchanged* | Marked with a `_note` — will be re-verified as soon as keys are available |
|
||||
|
||||
---
|
||||
|
||||
## 🩺 Disk Health Monitor — Observation Persistence in the Journal Watcher
|
||||
|
||||
A latent bug in `notification_events.py::_check_disk_io` meant real-time kernel I/O errors caught by the journal watcher were surfaced as notifications but never written to the permanent per-disk observations table. In practice the parallel periodic dmesg scan usually recorded the observation shortly after, but under timing edge cases (stale dmesg window, service restart right after the error, buffer rotation) the observation could go missing.
|
||||
|
||||
The journal watcher now records the observation before the 24h notification cooldown gate, using the same family-based signature classification (`io_<disk>_ata_connection_error`, `io_<disk>_block_io_error`, `io_<disk>_ata_failed_command`) as the periodic scan. Both paths now deduplicate into the same row via the UPSERT in `record_disk_observation`, so occurrence counts are accurate regardless of which detector fired first.
|
||||
|
||||
---
|
||||
|
||||
## 🔧 NVIDIA Installer Polish
|
||||
|
||||
### `lsmod` race condition silenced
|
||||
During reinstall, the module-unload verification in `unload_nvidia_modules` produced spurious `lsmod: ERROR: could not open '/sys/module/nvidia_uvm/holders'` errors because `lsmod` reads `/proc/modules` and then opens each module's `holders/` directory, which disappears transiently while the module is being removed. The check now reads `/proc/modules` directly and inserts short sleeps to let the kernel finalise the unload before re-verifying. Applied in the same spirit to the four other `lsmod` call sites in the script.
|
||||
|
||||
### Dialog → whiptail in the LXC update flow
|
||||
The "Insufficient Disk Space" message in `update_lxc_nvidia` and the "Update NVIDIA in LXC Containers" confirmation now use `whiptail`-style dialogs consistent with the rest of the in-flow messaging, avoiding the visual break that `dialog --msgbox` caused when rendered mid-sequence in the container-update phase.
|
||||
|
||||
---
|
||||
|
||||
## 🧵 LXC Lifecycle Helper — Timeout-Safe Stop
|
||||
|
||||
A plain `pct stop` can hang indefinitely when the container has a stale lock from a previous aborted operation, when processes inside (Plex, Jellyfin, databases) ignore TERM and fall into uninterruptible-sleep while the GPU they were using is yanked out, or when `pct shutdown --timeout` is not enforced by pct itself. Field reports of 5+ min waits during GPU mode switches made this a real UX hazard.
|
||||
|
||||
New shared helper `_pmx_stop_lxc <ctid> [log_file]` in `pci_passthrough_helpers.sh`:
|
||||
1. Returns 0 immediately if the container is not running
|
||||
2. Best-effort `pct unlock` (silent on failure) — most containers aren't actually locked; we only care about the cases where they are
|
||||
3. `pct shutdown --forceStop 1 --timeout 30` wrapped in an external `timeout 45` so we never wait longer than that for the graceful phase, even if pct stalls on backend I/O
|
||||
4. Verifies actual status via `pct status` — pct can return non-zero while the container is in fact stopped
|
||||
5. If still running, `pct stop` wrapped in `timeout 60`. Verify again
|
||||
6. Returns 1 only if the container is truly stuck after ~107 s total — the wizard moves on instead of hanging
|
||||
|
||||
Wired into the three GPU-mode paths that stop LXCs during a switch: `switch_gpu_mode.sh`, `switch_gpu_mode_direct.sh`, and `add_gpu_vm.sh::cleanup_lxc_configs`.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ `add_gpu_vm.sh` Reboot Prompt Stability
|
||||
|
||||
The final "Reboot Required" prompt of the GPU-to-VM assignment wizard was triggering spurious reboots in certain menu-chain invocations (`menu` → `main_menu` → `hw_grafics_menu` → `add_gpu_vm`). With the `_pmx_yesno` helper it sometimes returned exit 0 without the user having actually confirmed, calling `reboot` immediately. With a bare `read` in its place the process would get SIGTTIN-suspended when the menu chain detached the script from the terminal's foreground process group, leaving `[N]+ Stopped menu` on the parent shell with no chance to answer.
|
||||
|
||||
The prompt now uses `whiptail --yesno` invoked directly (the pattern verified to work reliably in that menu chain) and inserts a `Press Enter to continue ... read -r` pause between the "Yes" answer and the actual `reboot` call — so an accidental Enter on the confirm button cannot trigger an immediate reboot without a visible confirmation step first.
|
||||
|
||||
---
|
||||
|
||||
### 🙏 Thanks
|
||||
|
||||
Thank you to the users who reported the SR-IOV, LiteLLM/MLX and GPU + audio cases — these improvements exist because of detailed, reproducible reports. Feel free to keep reporting issues or suggesting improvements 🙌.
|
||||
|
||||
---
|
||||
|
||||
|
||||
## 2026-04-17
|
||||
|
||||
### New version ProxMenux v1.2.0 — *AI-Enhanced Monitoring*
|
||||
|
||||
|
||||

|
||||
|
||||
@@ -245,9 +400,6 @@ This release also consolidates significant work on the core ProxMenux scripts.
|
||||
- **Hardware & graphics menu** restructured for consistency with the rest of ProxMenux
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
## 2026-03-14
|
||||
|
||||
### New version v1.1.9 — *Helper Scripts Catalog Rebuilt*
|
||||
|
||||
@@ -16,7 +16,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under this license:
|
||||
1. Attribution: You must give appropriate credit to the original author (MacRimi).
|
||||
1. Attribution: You must give appropriate credit to the original author (MacRimi)
|
||||
and to all contributors involved in the development of the project.
|
||||
2. Copyleft: If you remix, transform, or build upon ProxMenux, you must
|
||||
distribute your contributions under the same GPL-3.0 license.
|
||||
3. Source Code: Anyone distributing a modified version must make the
|
||||
@@ -34,4 +35,4 @@ FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING
|
||||
FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
@@ -144,10 +144,13 @@ The following dependencies are installed automatically during setup:
|
||||
| `python3` + `python3-venv` | Translation support *(Translation version only)* |
|
||||
| `googletrans` | Google Translate library *(Translation version only)* |
|
||||
|
||||
<br>
|
||||
|
||||
> **🛡️ Security Note / VirusTotal False Positive**
|
||||
> If you scan the raw installation URL on VirusTotal, you might see a 1/95 detection by heuristic engines like *Chong Lua Dao*. This is a **known false positive**. Because this script uses the standard `curl | bash` installation pattern and downloads legitimate binaries (like `jq` from its official GitHub release), overly aggressive scanners flag the *behavior*. The script is 100% open source and safe to review. You can read more about this in [Issue #162](https://github.com/MacRimi/ProxMenux/issues/162).
|
||||
|
||||
---
|
||||
|
||||
|
||||
## ⭐ Support the Project!
|
||||
If you find **ProxMenux** useful, consider giving it a ⭐ on GitHub to help others discover it!
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
1.1.9.5
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 54 KiB |
@@ -963,13 +963,13 @@ install_translation_version() {
|
||||
cp "./menu" "$INSTALL_DIR/$MENU_SCRIPT"
|
||||
cp "./version.txt" "$LOCAL_VERSION_FILE"
|
||||
cp "./install_proxmenux.sh" "$BASE_DIR/install_proxmenux.sh"
|
||||
|
||||
|
||||
mkdir -p "$BASE_DIR/scripts"
|
||||
cp -r "./scripts/"* "$BASE_DIR/scripts/"
|
||||
chmod -R +x "$BASE_DIR/scripts/"
|
||||
chmod +x "$BASE_DIR/install_proxmenux.sh"
|
||||
msg_ok "Necessary files created."
|
||||
|
||||
|
||||
chmod +x "$INSTALL_DIR/$MENU_SCRIPT"
|
||||
|
||||
((current_step++))
|
||||
|
||||
+4683
-1423
File diff suppressed because it is too large
Load Diff
@@ -80,72 +80,39 @@ check_updates_stable() {
|
||||
if curl -fsSL "$INSTALL_URL" -o "$INSTALL_SCRIPT"; then
|
||||
chmod +x "$INSTALL_SCRIPT"
|
||||
bash "$INSTALL_SCRIPT" --update
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Beta update check (develop branch) ────────────────────
|
||||
# ── Beta-mode update check (main + develop) ───────────────
|
||||
# When the beta program is active, check BOTH channels. The stable check
|
||||
# is delegated to check_updates_stable (same prompt, same installer). After
|
||||
# that we only need the beta-specific part: develop vs beta_version.txt.
|
||||
check_updates_beta() {
|
||||
local BETA_VERSION_URL="$REPO_DEVELOP/beta_version.txt"
|
||||
local STABLE_VERSION_URL="$REPO_MAIN/version.txt"
|
||||
local INSTALL_BETA_URL="$REPO_DEVELOP/install_proxmenux_beta.sh"
|
||||
local INSTALL_STABLE_URL="$REPO_MAIN/install_proxmenux.sh"
|
||||
local INSTALL_SCRIPT="$BASE_DIR/install_proxmenux_beta.sh"
|
||||
# 1. Stable release on main — reuse the non-beta path.
|
||||
check_updates_stable
|
||||
|
||||
# ── 1. Check if a stable release has superseded the beta ──
|
||||
# If main's version.txt exists and is newer than local beta_version.txt,
|
||||
# the beta cycle is over and we invite the user to switch to stable.
|
||||
local STABLE_VERSION BETA_LOCAL_VERSION
|
||||
STABLE_VERSION="$(curl -fsSL "$STABLE_VERSION_URL" 2>/dev/null | head -n 1)"
|
||||
BETA_LOCAL_VERSION="$(head -n 1 "$BETA_VERSION_FILE" 2>/dev/null)"
|
||||
|
||||
if [[ -n "$STABLE_VERSION" && -n "$BETA_LOCAL_VERSION" ]]; then
|
||||
# Simple string comparison is enough if versions follow semver x.y.z
|
||||
if [[ "$STABLE_VERSION" != "$BETA_LOCAL_VERSION" ]] && \
|
||||
printf '%s\n' "$BETA_LOCAL_VERSION" "$STABLE_VERSION" | sort -V | tail -1 | grep -qx "$STABLE_VERSION"; then
|
||||
|
||||
# Stable is newer — offer migration out of beta
|
||||
if whiptail --title "🎉 Stable Release Available" \
|
||||
--yesno "A stable release of ProxMenux is now available!\n\nStable version : $STABLE_VERSION\nYour beta : $BETA_LOCAL_VERSION\n\nThe beta program for this cycle is complete.\nWould you like to switch to the stable release now?\n\n(Choosing 'No' keeps you on the beta for now.)" \
|
||||
16 68; then
|
||||
|
||||
msg_warn "Switching to stable release $STABLE_VERSION ..."
|
||||
|
||||
local tmp_installer="/tmp/install_proxmenux_stable_$$.sh"
|
||||
if curl -fsSL "$INSTALL_STABLE_URL" -o "$tmp_installer"; then
|
||||
chmod +x "$tmp_installer"
|
||||
bash "$tmp_installer"
|
||||
rm -f "$tmp_installer"
|
||||
else
|
||||
msg_error "Could not download the stable installer. Try manually:"
|
||||
echo
|
||||
echo " bash -c \"\$(wget -qLO - $INSTALL_STABLE_URL)\""
|
||||
echo
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
# User chose to stay on beta — continue normally
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── 2. Check for a newer beta build on develop ─────────────
|
||||
# 2. Beta build on develop.
|
||||
[[ ! -f "$BETA_VERSION_FILE" ]] && return 0
|
||||
|
||||
local REMOTE_BETA_VERSION
|
||||
REMOTE_BETA_VERSION="$(curl -fsSL "$BETA_VERSION_URL" 2>/dev/null | head -n 1)"
|
||||
[[ -z "$REMOTE_BETA_VERSION" ]] && return 0
|
||||
[[ "$BETA_LOCAL_VERSION" = "$REMOTE_BETA_VERSION" ]] && return 0
|
||||
local REMOTE_BETA LOCAL_BETA
|
||||
REMOTE_BETA="$(curl -fsSL "$REPO_DEVELOP/beta_version.txt" 2>/dev/null | head -n 1)"
|
||||
LOCAL_BETA="$(head -n 1 "$BETA_VERSION_FILE" 2>/dev/null)"
|
||||
[[ -z "$REMOTE_BETA" || -z "$LOCAL_BETA" || "$LOCAL_BETA" = "$REMOTE_BETA" ]] && return 0
|
||||
[[ "$(printf '%s\n%s\n' "$LOCAL_BETA" "$REMOTE_BETA" | sort -V | tail -1)" = "$REMOTE_BETA" ]] || return 0
|
||||
|
||||
if whiptail --title "Beta Update Available" \
|
||||
--yesno "A new beta build is available!\n\nInstalled beta : $BETA_LOCAL_VERSION\nNew beta build : $REMOTE_BETA_VERSION\n\nThis is a pre-release build from the develop branch.\nDo you want to update now?" \
|
||||
13 64 --defaultno; then
|
||||
--yesno "A new beta build is available!\n\nInstalled beta : $LOCAL_BETA\nNew beta build : $REMOTE_BETA\n\nDo you want to update now?" \
|
||||
12 64 --defaultno; then
|
||||
|
||||
msg_warn "Updating to beta build $REMOTE_BETA_VERSION ..."
|
||||
msg_warn "Updating to beta build $REMOTE_BETA ..."
|
||||
|
||||
if curl -fsSL "$INSTALL_BETA_URL" -o "$INSTALL_SCRIPT"; then
|
||||
chmod +x "$INSTALL_SCRIPT"
|
||||
bash "$INSTALL_SCRIPT" --update
|
||||
local INSTALL_BETA_SCRIPT="$BASE_DIR/install_proxmenux_beta.sh"
|
||||
if curl -fsSL "$REPO_DEVELOP/install_proxmenux_beta.sh" -o "$INSTALL_BETA_SCRIPT"; then
|
||||
chmod +x "$INSTALL_BETA_SCRIPT"
|
||||
bash "$INSTALL_BETA_SCRIPT" --update
|
||||
return 0
|
||||
else
|
||||
msg_error "Could not download the beta installer from the develop branch."
|
||||
fi
|
||||
|
||||
@@ -138,6 +138,12 @@ if [[ -f "$vm_conf" ]]; then
|
||||
slot_has_gpu=false
|
||||
for dev in /sys/bus/pci/devices/0000:${slot}.*; do
|
||||
[[ -e "$dev" ]] || continue
|
||||
# SR-IOV: skip Virtual Functions when iterating a whole slot.
|
||||
# VFs share the slot with their PF but carry their own driver
|
||||
# state; their vfio-pci rebind is handled by Proxmox at VM
|
||||
# start. Pre-flighting them would falsely block SR-IOV setups
|
||||
# where the PF legitimately stays on the native driver.
|
||||
[[ -L "${dev}/physfn" ]] && continue
|
||||
class_hex="$(cat "$dev/class" 2>/dev/null | sed 's/^0x//')"
|
||||
[[ "${class_hex:0:2}" != "03" ]] && continue
|
||||
slot_has_gpu=true
|
||||
@@ -159,6 +165,14 @@ if [[ -f "$vm_conf" ]]; then
|
||||
details+=$'\n'"- ${id}: PCI device not found"
|
||||
continue
|
||||
fi
|
||||
# SR-IOV VF: do not pre-flight the driver. Proxmox rebinds the VF
|
||||
# to vfio-pci as part of VM start; at pre-start time the VF may
|
||||
# still be on its native driver (i915, etc.) — that is normal,
|
||||
# not an error. Blocking here would prevent every SR-IOV VF
|
||||
# passthrough from starting.
|
||||
if [[ -L "${dev_path}/physfn" ]]; then
|
||||
continue
|
||||
fi
|
||||
class_hex="$(cat "$dev_path/class" 2>/dev/null | sed 's/^0x//')"
|
||||
# Enforce vfio only for display/3D devices (PCI class 03xx).
|
||||
[[ "${class_hex:0:2}" == "03" ]] || continue
|
||||
|
||||
@@ -11,6 +11,205 @@ function _pci_is_iommu_active() {
|
||||
find /sys/kernel/iommu_groups -mindepth 1 -maxdepth 1 -type d -print -quit 2>/dev/null | grep -q .
|
||||
}
|
||||
|
||||
# Audio-companion cascade helpers (Part 2 of the SR-IOV / audio rework).
|
||||
#
|
||||
# When a GPU is detached from a VM (user chooses "Remove GPU from VM
|
||||
# config" during a mode switch), the historic sed-based cleanup only
|
||||
# removes hostpci lines that match the GPU's PCI slot (e.g. 00:02).
|
||||
# That leaves any "companion" audio that lives at a different slot —
|
||||
# typically the chipset audio at 00:1f.X, which add_gpu_vm.sh now adds
|
||||
# alongside an Intel iGPU via the checklist from Part 1 — stranded in
|
||||
# the VM config. On the next VM start, vfio-pci is no longer claiming
|
||||
# that audio device (its vendor:device was pulled from vfio.conf
|
||||
# during the switch-back) and either QEMU fails to rebind it or it
|
||||
# breaks host audio.
|
||||
#
|
||||
# _vm_list_orphan_audio_hostpci reports those stranded entries; each
|
||||
# caller uses its own UI (dialog, whiptail, hybrid_msgbox) to confirm
|
||||
# removal and then calls _vm_remove_hostpci_index per selected entry.
|
||||
|
||||
# Usage: _vm_list_orphan_audio_hostpci <vmid> <gpu_slot_base>
|
||||
# gpu_slot_base: the GPU's PCI slot WITHOUT function suffix, e.g. "00:02".
|
||||
# Output: one line per orphan entry, in the form "idx|bdf|human_name".
|
||||
# Empty output when the VM has no audio passthrough outside the GPU slot.
|
||||
#
|
||||
# A hostpci audio entry is reported as "orphan" ONLY if the same VM has
|
||||
# no display/3D-class hostpci at the same slot base. Rationale: the
|
||||
# audio at e.g. 02:00.1 is the HDMI codec of a dGPU at 02:00.0 — if
|
||||
# that dGPU is still being passed through to this VM (as a separate
|
||||
# hostpciN), the audio belongs to it and must not be touched when
|
||||
# detaching an unrelated GPU (e.g. an Intel iGPU at 00:02.0) from the
|
||||
# same VM. Without this filter we would strip the HDMI audio of every
|
||||
# other GPU in the VM, leaving them silent on next start.
|
||||
function _vm_list_orphan_audio_hostpci() {
|
||||
local vmid="$1" gpu_slot="$2"
|
||||
[[ -n "$vmid" && -n "$gpu_slot" ]] || return 1
|
||||
local conf="/etc/pve/qemu-server/${vmid}.conf"
|
||||
[[ -f "$conf" ]] || return 1
|
||||
|
||||
# ── Pass 1 ── collect the slot bases of hostpci entries whose target
|
||||
# device is display/3D (class 03xx). These slots "own" any audio at
|
||||
# the same slot base (the .1 HDMI codec pattern).
|
||||
local -a display_slots=()
|
||||
local line raw_bdf bdf class_hex slot_base
|
||||
while IFS= read -r line; do
|
||||
raw_bdf=$(printf '%s' "$line" \
|
||||
| grep -oE '(0000:)?[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-7]' \
|
||||
| head -1)
|
||||
[[ -z "$raw_bdf" ]] && continue
|
||||
bdf="$raw_bdf"
|
||||
[[ "$bdf" =~ ^0000: ]] || bdf="0000:$bdf"
|
||||
class_hex=$(cat "/sys/bus/pci/devices/${bdf}/class" 2>/dev/null | sed 's/^0x//')
|
||||
if [[ "${class_hex:0:2}" == "03" ]]; then
|
||||
slot_base="${bdf#0000:}"
|
||||
slot_base="${slot_base%.*}"
|
||||
display_slots+=("$slot_base")
|
||||
fi
|
||||
done < <(grep -E '^hostpci[0-9]+:' "$conf")
|
||||
|
||||
# ── Pass 2 ── classify audio entries.
|
||||
local idx raw name
|
||||
local has_display_sibling ds
|
||||
while IFS= read -r line; do
|
||||
idx=$(printf '%s' "$line" | sed -nE 's/^hostpci([0-9]+):.*/\1/p')
|
||||
[[ -z "$idx" ]] && continue
|
||||
|
||||
raw=$(printf '%s' "$line" \
|
||||
| grep -oE '(0000:)?[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-7]' \
|
||||
| head -1)
|
||||
[[ -z "$raw" ]] && continue
|
||||
bdf="$raw"
|
||||
[[ "$bdf" =~ ^0000: ]] || bdf="0000:$bdf"
|
||||
slot_base="${bdf#0000:}"
|
||||
slot_base="${slot_base%.*}"
|
||||
|
||||
# Skip entries that match the GPU slot — those go through the
|
||||
# caller's primary sed/qm-set cleanup, not through this helper.
|
||||
[[ "$slot_base" == "$gpu_slot" ]] && continue
|
||||
|
||||
# Only audio class devices (PCI class 04xx) are candidates.
|
||||
class_hex=$(cat "/sys/bus/pci/devices/${bdf}/class" 2>/dev/null | sed 's/^0x//')
|
||||
[[ "${class_hex:0:2}" == "04" ]] || continue
|
||||
|
||||
# Display-sibling guard: skip audio that is the HDMI/DP codec of a
|
||||
# still-present dGPU in this VM.
|
||||
has_display_sibling=false
|
||||
for ds in "${display_slots[@]}"; do
|
||||
if [[ "$ds" == "$slot_base" ]]; then
|
||||
has_display_sibling=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
$has_display_sibling && continue
|
||||
|
||||
name=$(lspci -nn -s "${bdf#0000:}" 2>/dev/null \
|
||||
| sed 's/^[^ ]* //' \
|
||||
| cut -c1-52)
|
||||
[[ -z "$name" ]] && name="PCI audio device"
|
||||
|
||||
printf '%s|%s|%s\n' "$idx" "$bdf" "$name"
|
||||
done < <(grep -E '^hostpci[0-9]+:' "$conf")
|
||||
}
|
||||
|
||||
# Returns 0 if the given PCI BDF still appears as a hostpci passthrough
|
||||
# target in any VM config, optionally excluding one or more VM IDs.
|
||||
# Usage: _pci_bdf_in_any_vm <bdf> [excluded_vmid]...
|
||||
#
|
||||
# Used by the switch-mode cascade to decide whether a companion audio
|
||||
# device's vendor:device pair is safe to remove from /etc/modprobe.d/
|
||||
# vfio.conf (only if no other VM still references it).
|
||||
function _pci_bdf_in_any_vm() {
|
||||
local bdf="$1"; shift
|
||||
[[ -n "$bdf" ]] || return 1
|
||||
local short_bdf="${bdf#0000:}"
|
||||
local conf vmid ex skip
|
||||
for conf in /etc/pve/qemu-server/*.conf; do
|
||||
[[ -f "$conf" ]] || continue
|
||||
vmid=$(basename "$conf" .conf)
|
||||
skip=false
|
||||
for ex in "$@"; do
|
||||
if [[ "$vmid" == "$ex" ]]; then
|
||||
skip=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
$skip && continue
|
||||
if grep -qE "^hostpci[0-9]+:.*(0000:)?${short_bdf}([,[:space:]]|$)" "$conf" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Usage: _vm_remove_hostpci_index <vmid> <idx> [log_file]
|
||||
# Removes hostpci<idx> from the VM config via `qm set --delete` so the
|
||||
# change goes through Proxmox's own validation path (running VMs get a
|
||||
# staged update). Returns the exit code of qm set.
|
||||
function _vm_remove_hostpci_index() {
|
||||
local vmid="$1" idx="$2"
|
||||
local log="${3:-${LOG_FILE:-/dev/null}}"
|
||||
[[ -n "$vmid" && -n "$idx" ]] || return 1
|
||||
qm set "$vmid" --delete "hostpci${idx}" >>"$log" 2>&1
|
||||
}
|
||||
|
||||
# Robust LXC stop for switch-mode / passthrough flows.
|
||||
#
|
||||
# A plain `pct stop` can hang indefinitely when:
|
||||
# - the container has a stale lock from a previous aborted operation,
|
||||
# - processes inside the container (Plex, Jellyfin, databases) ignore
|
||||
# the initial TERM and sit in uninterruptible-sleep (D state) while
|
||||
# the GPU they were using is being yanked out,
|
||||
# - the host is under load and Proxmox's state polling stalls,
|
||||
# - `pct shutdown --timeout` is not always enforced by pct itself
|
||||
# (observed field reports of 5+ min waits despite --timeout 30).
|
||||
#
|
||||
# Strategy:
|
||||
# 1) return 0 immediately if the container is not running,
|
||||
# 2) clear any stale lock (most common cause of hangs),
|
||||
# 3) try `pct shutdown --forceStop 1 --timeout 30`, wrapped in an
|
||||
# external `timeout 45` as belt-and-braces in case pct itself
|
||||
# blocks on backend I/O,
|
||||
# 4) verify actual status via `pct status` — do not trust exit codes,
|
||||
# pct can return non-zero while the container is actually stopped,
|
||||
# 5) if still running, fall back to `pct stop` wrapped in `timeout 60`,
|
||||
# 6) verify again and return 1 if the container is truly stuck
|
||||
# (only happens when processes are in D state — requires manual
|
||||
# intervention, but the wizard moves on instead of hanging).
|
||||
#
|
||||
# Usage: _pmx_stop_lxc <ctid> [log_file]
|
||||
# log_file defaults to $LOG_FILE if set, otherwise /dev/null.
|
||||
# Returns 0 on stopped / already-stopped, non-zero if every attempt failed.
|
||||
function _pmx_stop_lxc() {
|
||||
local ctid="$1"
|
||||
local log="${2:-${LOG_FILE:-/dev/null}}"
|
||||
|
||||
_pmx_lxc_running() {
|
||||
pct status "$1" 2>/dev/null | grep -q "status: running"
|
||||
}
|
||||
|
||||
_pmx_lxc_running "$ctid" || return 0
|
||||
|
||||
# Best-effort unlock — silent on failure because most containers aren't
|
||||
# actually locked; we only care about the cases where they are.
|
||||
pct unlock "$ctid" >>"$log" 2>&1 || true
|
||||
|
||||
# Graceful shutdown with forced kill after 30 s. The external `timeout 45`
|
||||
# guarantees we never wait longer than that for this step, even if pct
|
||||
# itself is stuck (the cushion over 30 s is to let the internal timeout
|
||||
# cleanly unwind before we kill pct).
|
||||
timeout 45 pct shutdown "$ctid" --forceStop 1 --timeout 30 >>"$log" 2>&1 || true
|
||||
sleep 1
|
||||
_pmx_lxc_running "$ctid" || return 0
|
||||
|
||||
# Fallback: abrupt stop, also externally capped so the wizard does not
|
||||
# hang the user indefinitely if lxc-stop blocks on D-state processes.
|
||||
timeout 60 pct stop "$ctid" >>"$log" 2>&1 || true
|
||||
sleep 1
|
||||
_pmx_lxc_running "$ctid" || return 0
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
function _pci_next_hostpci_index() {
|
||||
local vmid="$1"
|
||||
local idx=0
|
||||
@@ -50,3 +249,109 @@ function _pci_function_assigned_to_vm() {
|
||||
|
||||
qm config "$vmid" 2>/dev/null | grep -qE "$pattern"
|
||||
}
|
||||
|
||||
# ==========================================================
|
||||
# SR-IOV detection helpers
|
||||
# ==========================================================
|
||||
# A PCI device participates in SR-IOV when either:
|
||||
# - It is a Physical Function (PF) with one or more active VFs
|
||||
# → /sys/bus/pci/devices/<BDF>/sriov_numvfs > 0
|
||||
# - It is a Virtual Function (VF) spawned by a PF
|
||||
# → /sys/bus/pci/devices/<BDF>/physfn is a symlink to the PF
|
||||
#
|
||||
# These helpers accept a BDF in either "0000:00:02.0" or "00:02.0" form.
|
||||
# Return 0 on match, non-zero otherwise (shell convention).
|
||||
|
||||
function _pci_normalize_bdf() {
|
||||
local id="$1"
|
||||
[[ -z "$id" ]] && return 1
|
||||
[[ "$id" =~ ^0000: ]] || id="0000:${id}"
|
||||
printf '%s\n' "$id"
|
||||
}
|
||||
|
||||
function _pci_is_vf() {
|
||||
local id
|
||||
id=$(_pci_normalize_bdf "$1") || return 1
|
||||
[[ -L "/sys/bus/pci/devices/${id}/physfn" ]]
|
||||
}
|
||||
|
||||
function _pci_get_pf_of_vf() {
|
||||
local id
|
||||
id=$(_pci_normalize_bdf "$1") || return 1
|
||||
local link="/sys/bus/pci/devices/${id}/physfn"
|
||||
[[ -L "$link" ]] || return 1
|
||||
basename "$(readlink -f "$link")"
|
||||
}
|
||||
|
||||
function _pci_is_sriov_capable() {
|
||||
local id total
|
||||
id=$(_pci_normalize_bdf "$1") || return 1
|
||||
total=$(cat "/sys/bus/pci/devices/${id}/sriov_totalvfs" 2>/dev/null)
|
||||
[[ -n "$total" && "$total" -gt 0 ]]
|
||||
}
|
||||
|
||||
function _pci_active_vf_count() {
|
||||
local id num
|
||||
id=$(_pci_normalize_bdf "$1") || { echo 0; return 1; }
|
||||
num=$(cat "/sys/bus/pci/devices/${id}/sriov_numvfs" 2>/dev/null)
|
||||
[[ -n "$num" ]] || num=0
|
||||
echo "$num"
|
||||
}
|
||||
|
||||
function _pci_has_active_vfs() {
|
||||
local n
|
||||
n=$(_pci_active_vf_count "$1")
|
||||
[[ "$n" -gt 0 ]]
|
||||
}
|
||||
|
||||
# Filter an array (by name) of PCI BDFs in place, removing entries that
|
||||
# are SR-IOV Virtual Functions or Physical Functions with active VFs —
|
||||
# i.e. the configurations ProxMenux refuses to operate on today.
|
||||
#
|
||||
# Usage: _pci_sriov_filter_array <array_name_by_ref>
|
||||
# Output: one line per removed entry, formatted "BDF|role" where role is
|
||||
# whatever _pci_sriov_role prints (e.g. "vf 0000:00:02.0" or
|
||||
# "pf-active 7"). The caller decides how to surface the removals.
|
||||
# Returns: 0 if the caller should continue (even if some entries were
|
||||
# filtered); the array mutation happens either way.
|
||||
function _pci_sriov_filter_array() {
|
||||
local -n _arr_ref="$1"
|
||||
local -a _kept=()
|
||||
local bdf role first
|
||||
for bdf in "${_arr_ref[@]}"; do
|
||||
role=$(_pci_sriov_role "$bdf" 2>/dev/null)
|
||||
first="${role%% *}"
|
||||
if [[ "$first" == "vf" || "$first" == "pf-active" ]]; then
|
||||
echo "${bdf}|${role}"
|
||||
else
|
||||
_kept+=("$bdf")
|
||||
fi
|
||||
done
|
||||
_arr_ref=("${_kept[@]}")
|
||||
}
|
||||
|
||||
# Emits a one-line SR-IOV role description for diagnostics/messages.
|
||||
# Prints one of:
|
||||
# "pf-active <N>" — PF with N>0 active VFs
|
||||
# "pf-idle" — SR-IOV capable PF with 0 VFs (benign)
|
||||
# "vf <PF-BDF>" — VF (names its parent PF)
|
||||
# "none" — device not involved in SR-IOV
|
||||
function _pci_sriov_role() {
|
||||
local id
|
||||
id=$(_pci_normalize_bdf "$1") || { echo "none"; return 0; }
|
||||
if _pci_is_vf "$id"; then
|
||||
echo "vf $(_pci_get_pf_of_vf "$id")"
|
||||
return 0
|
||||
fi
|
||||
if _pci_is_sriov_capable "$id"; then
|
||||
local n
|
||||
n=$(_pci_active_vf_count "$id")
|
||||
if [[ "$n" -gt 0 ]]; then
|
||||
echo "pf-active ${n}"
|
||||
else
|
||||
echo "pf-idle"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
echo "none"
|
||||
}
|
||||
|
||||
@@ -28,6 +28,11 @@ NVIDIA_VID_DID=""
|
||||
if [[ -f "$UTILS_FILE" ]]; then
|
||||
source "$UTILS_FILE"
|
||||
fi
|
||||
if [[ -f "$LOCAL_SCRIPTS/global/pci_passthrough_helpers.sh" ]]; then
|
||||
source "$LOCAL_SCRIPTS/global/pci_passthrough_helpers.sh"
|
||||
elif [[ -f "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/pci_passthrough_helpers.sh" ]]; then
|
||||
source "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/pci_passthrough_helpers.sh"
|
||||
fi
|
||||
if [[ -f "$LOCAL_SCRIPTS/global/gpu_hook_guard_helpers.sh" ]]; then
|
||||
source "$LOCAL_SCRIPTS/global/gpu_hook_guard_helpers.sh"
|
||||
elif [[ -f "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/gpu_hook_guard_helpers.sh" ]]; then
|
||||
@@ -259,6 +264,67 @@ select_container() {
|
||||
# ============================================================
|
||||
# GPU checklist selection
|
||||
# ============================================================
|
||||
# ============================================================
|
||||
# SR-IOV guard — refuse to pass an SR-IOV GPU to an LXC via ProxMenux.
|
||||
# Although the LXC flow does not rewrite vfio.conf/blacklist (so it is
|
||||
# not destructive like add_gpu_vm.sh), it blindly globs /dev/dri/card*
|
||||
# and /dev/dri/renderD* without mapping each node to its BDF. With 7
|
||||
# VFs the container may end up holding any/all of them, which is not
|
||||
# the behavior a user asking for "one VF to this LXC" expects. Until a
|
||||
# VF-aware LXC flow exists, stop and point to manual configuration —
|
||||
# matching the policy used in switch_gpu_mode.sh and add_gpu_vm.sh.
|
||||
# ============================================================
|
||||
check_sriov_and_block_if_needed() {
|
||||
declare -F _pci_sriov_role >/dev/null 2>&1 || return 0
|
||||
|
||||
local gpu_type pci role first_word
|
||||
local -a offenders=()
|
||||
|
||||
for gpu_type in "${SELECTED_GPUS[@]}"; do
|
||||
case "$gpu_type" in
|
||||
intel) pci="$INTEL_PCI" ;;
|
||||
amd) pci="$AMD_PCI" ;;
|
||||
nvidia) pci="$NVIDIA_PCI" ;;
|
||||
*) continue ;;
|
||||
esac
|
||||
[[ -n "$pci" ]] || continue
|
||||
|
||||
role=$(_pci_sriov_role "$pci")
|
||||
first_word="${role%% *}"
|
||||
case "$first_word" in
|
||||
vf)
|
||||
offenders+=("${pci}|vf|${role#vf }")
|
||||
;;
|
||||
pf-active)
|
||||
offenders+=("${pci}|pf-active|${role#pf-active }")
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ ${#offenders[@]} -eq 0 ]] && return 0
|
||||
|
||||
local msg entry bdf kind info
|
||||
msg="\n\Zb\Z6$(translate 'SR-IOV Configuration Detected')\Zn\n\n"
|
||||
for entry in "${offenders[@]}"; do
|
||||
bdf="${entry%%|*}"
|
||||
kind="${entry#*|}"; kind="${kind%%|*}"
|
||||
info="${entry##*|}"
|
||||
if [[ "$kind" == "vf" ]]; then
|
||||
msg+=" • \Zb${bdf}\Zn — $(translate 'Virtual Function (parent PF:') ${info})\n"
|
||||
else
|
||||
msg+=" • \Zb${bdf}\Zn — $(translate 'Physical Function with') ${info} $(translate 'active VFs')\n"
|
||||
fi
|
||||
done
|
||||
msg+="\n$(translate 'To pass SR-IOV Virtual Functions to a container, edit the LXC configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')"
|
||||
|
||||
dialog --backtitle "ProxMenux" --colors \
|
||||
--title "$(translate 'SR-IOV Configuration Detected')" \
|
||||
--msgbox "$msg" 16 82
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
|
||||
select_gpus() {
|
||||
local gpu_items=()
|
||||
$HAS_INTEL && gpu_items+=("intel" "${INTEL_NAME:-Intel iGPU}" "off")
|
||||
@@ -927,6 +993,7 @@ main() {
|
||||
detect_host_gpus
|
||||
select_container
|
||||
select_gpus
|
||||
check_sriov_and_block_if_needed
|
||||
check_vfio_switch_mode
|
||||
precheck_existing_lxc_gpu_config
|
||||
|
||||
|
||||
+293
-30
@@ -71,6 +71,7 @@ SELECTED_GPU_NAME=""
|
||||
declare -a IOMMU_DEVICES=() # all PCI addrs in IOMMU group (endpoint devices)
|
||||
declare -a IOMMU_VFIO_IDS=() # vendor:device for vfio-pci ids=
|
||||
declare -a EXTRA_AUDIO_DEVICES=() # sibling audio function(s), typically *.1
|
||||
declare -a EXTRA_AUDIO_INFO=() # parallel to EXTRA_AUDIO_DEVICES — "BDF|current_driver" pairs for the summary dialog
|
||||
IOMMU_GROUP=""
|
||||
IOMMU_PENDING_REBOOT=false
|
||||
|
||||
@@ -212,28 +213,32 @@ _strip_colors() {
|
||||
printf '%s' "$1" | sed 's/\\Z[0-9a-zA-Z]//g'
|
||||
}
|
||||
|
||||
# Msgbox: dialog in standalone mode, whiptail in wizard mode
|
||||
# Msgbox: dialog in standalone mode, whiptail in wizard mode.
|
||||
# I/O pinned to /dev/tty so the dialog renders reliably regardless of
|
||||
# how the caller redirected stdin/stdout, and immune to the SIGTTOU
|
||||
# trap that fires when this script is resumed as a background job.
|
||||
_pmx_msgbox() {
|
||||
local title="$1" msg="$2" h="${3:-10}" w="${4:-72}"
|
||||
if [[ "$WIZARD_CALL" == "true" ]]; then
|
||||
whiptail --backtitle "ProxMenux" --title "$title" \
|
||||
--msgbox "$(_strip_colors "$msg")" "$h" "$w"
|
||||
--msgbox "$(_strip_colors "$msg")" "$h" "$w" < /dev/tty > /dev/tty
|
||||
else
|
||||
dialog --backtitle "ProxMenux" --colors \
|
||||
--title "$title" --msgbox "$msg" "$h" "$w"
|
||||
--title "$title" --msgbox "$msg" "$h" "$w" < /dev/tty > /dev/tty
|
||||
fi
|
||||
}
|
||||
|
||||
# Yesno: dialog in standalone mode, whiptail in wizard mode
|
||||
# Returns 0 for yes, 1 for no (same as dialog/whiptail)
|
||||
# Yesno: dialog in standalone mode, whiptail in wizard mode.
|
||||
# Returns 0 for yes, 1 for no (same as dialog/whiptail).
|
||||
# I/O pinned to /dev/tty — see the note on _pmx_msgbox.
|
||||
_pmx_yesno() {
|
||||
local title="$1" msg="$2" h="${3:-10}" w="${4:-72}"
|
||||
if [[ "$WIZARD_CALL" == "true" ]]; then
|
||||
whiptail --backtitle "ProxMenux" --title "$title" \
|
||||
--yesno "$(_strip_colors "$msg")" "$h" "$w"
|
||||
--yesno "$(_strip_colors "$msg")" "$h" "$w" < /dev/tty > /dev/tty
|
||||
else
|
||||
dialog --backtitle "ProxMenux" --colors \
|
||||
--title "$title" --yesno "$msg" "$h" "$w"
|
||||
--title "$title" --yesno "$msg" "$h" "$w" < /dev/tty > /dev/tty
|
||||
fi
|
||||
return $?
|
||||
}
|
||||
@@ -265,6 +270,27 @@ _pmx_menu() {
|
||||
return $?
|
||||
}
|
||||
|
||||
# Checklist: dialog in standalone mode, whiptail in wizard mode.
|
||||
# Usage: _pmx_checklist title msg h w list_h tag1 desc1 state1 tag2 desc2 state2 ...
|
||||
# state is "on" or "off". Returns the space-separated list of selected
|
||||
# tags on stdout (one line). Returns non-zero if the user cancels.
|
||||
_pmx_checklist() {
|
||||
local title="$1" msg="$2" h="$3" w="$4" lh="$5"
|
||||
shift 5
|
||||
if [[ "$WIZARD_CALL" == "true" ]]; then
|
||||
whiptail --backtitle "ProxMenux" \
|
||||
--title "$title" \
|
||||
--checklist "$(_strip_colors "$msg")" "$h" "$w" "$lh" \
|
||||
"$@" 3>&1 1>&2 2>&3
|
||||
else
|
||||
dialog --backtitle "ProxMenux" --colors \
|
||||
--title "$title" \
|
||||
--checklist "$msg" "$h" "$w" "$lh" \
|
||||
"$@" 2>&1 >/dev/tty
|
||||
fi
|
||||
return $?
|
||||
}
|
||||
|
||||
_file_has_exact_line() {
|
||||
local line="$1"
|
||||
local file="$2"
|
||||
@@ -718,6 +744,48 @@ select_gpu() {
|
||||
}
|
||||
|
||||
|
||||
# ==========================================================
|
||||
# SR-IOV guard — refuse to assign a Virtual Function or a Physical
|
||||
# Function with active VFs. Matches the policy in switch_gpu_mode.sh:
|
||||
# writing this GPU's vendor:device to /etc/modprobe.d/vfio.conf would
|
||||
# let vfio-pci claim the PF at next boot and destroy the whole VF
|
||||
# tree. ProxMenux does not yet manage SR-IOV lifecycle, so we stop
|
||||
# before touching vfio.conf / blacklist.conf.
|
||||
# ==========================================================
|
||||
check_sriov_and_block_if_needed() {
|
||||
declare -F _pci_sriov_role >/dev/null 2>&1 || return 0
|
||||
[[ -n "$SELECTED_GPU_PCI" ]] || return 0
|
||||
|
||||
local role first_word detail=""
|
||||
role=$(_pci_sriov_role "$SELECTED_GPU_PCI")
|
||||
first_word="${role%% *}"
|
||||
|
||||
case "$first_word" in
|
||||
vf)
|
||||
local parent="${role#vf }"
|
||||
detail="$(translate 'The selected device') \Zb${SELECTED_GPU_PCI}\Zn $(translate 'is an SR-IOV Virtual Function (VF). Its parent Physical Function is') \Zb${parent}\Zn."
|
||||
;;
|
||||
pf-active)
|
||||
local n="${role#pf-active }"
|
||||
detail="$(translate 'The selected device') \Zb${SELECTED_GPU_PCI}\Zn $(translate 'is a Physical Function with') \Zb${n}\Zn $(translate 'active Virtual Functions. Changing its driver binding would destroy every VF.')"
|
||||
;;
|
||||
*)
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
|
||||
local msg
|
||||
msg="\n\Zb\Z6$(translate 'SR-IOV Configuration Detected')\Zn\n\n"
|
||||
msg+="${detail}\n\n"
|
||||
msg+="$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')"
|
||||
|
||||
_pmx_msgbox "$(translate 'SR-IOV Configuration Detected')" "$msg" 16 82
|
||||
|
||||
[[ "$WIZARD_CALL" == "true" ]] && _set_wizard_result "cancelled"
|
||||
exit 0
|
||||
}
|
||||
|
||||
|
||||
# ==========================================================
|
||||
# Phase 1 — Step 4: Single-GPU warning
|
||||
# ==========================================================
|
||||
@@ -1067,30 +1135,39 @@ analyze_iommu_group() {
|
||||
|
||||
}
|
||||
|
||||
detect_optional_gpu_audio() {
|
||||
EXTRA_AUDIO_DEVICES=()
|
||||
|
||||
local sibling_audio="${SELECTED_GPU_PCI%.*}.1"
|
||||
local dev_path="/sys/bus/pci/devices/${sibling_audio}"
|
||||
[[ -d "$dev_path" ]] || return 0
|
||||
|
||||
# Returns 0 if the BDF at $1 is a real PCI audio device (class 04xx).
|
||||
_pci_is_audio_device() {
|
||||
local bdf="$1"
|
||||
[[ -n "$bdf" ]] || return 1
|
||||
local dev_path="/sys/bus/pci/devices/${bdf}"
|
||||
[[ -d "$dev_path" ]] || return 1
|
||||
local class_hex
|
||||
class_hex=$(cat "${dev_path}/class" 2>/dev/null | sed 's/^0x//')
|
||||
[[ "${class_hex:0:2}" == "04" ]] || return 0
|
||||
[[ "${class_hex:0:2}" == "04" ]]
|
||||
}
|
||||
|
||||
local already_in_group=false dev
|
||||
# Registers an audio BDF for passthrough alongside the GPU.
|
||||
# Idempotent: skips if the BDF was already recorded by analyze_iommu_group
|
||||
# (IOMMU_DEVICES) or by a previous call here (EXTRA_AUDIO_DEVICES).
|
||||
# Updates EXTRA_AUDIO_DEVICES, EXTRA_AUDIO_INFO, and IOMMU_VFIO_IDS.
|
||||
_register_gpu_audio_device() {
|
||||
local bdf="$1"
|
||||
[[ -n "$bdf" ]] || return 1
|
||||
local dev_path="/sys/bus/pci/devices/${bdf}"
|
||||
[[ -d "$dev_path" ]] || return 1
|
||||
|
||||
local dev
|
||||
for dev in "${IOMMU_DEVICES[@]}"; do
|
||||
if [[ "$dev" == "$sibling_audio" ]]; then
|
||||
already_in_group=true
|
||||
break
|
||||
fi
|
||||
[[ "$dev" == "$bdf" ]] && return 0
|
||||
done
|
||||
for dev in "${EXTRA_AUDIO_DEVICES[@]}"; do
|
||||
[[ "$dev" == "$bdf" ]] && return 0
|
||||
done
|
||||
|
||||
if [[ "$already_in_group" == "true" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
EXTRA_AUDIO_DEVICES+=("$sibling_audio")
|
||||
EXTRA_AUDIO_DEVICES+=("$bdf")
|
||||
local drv
|
||||
drv=$(_get_pci_driver "$bdf")
|
||||
EXTRA_AUDIO_INFO+=("${bdf}|${drv}")
|
||||
|
||||
local vid did new_id
|
||||
vid=$(cat "${dev_path}/vendor" 2>/dev/null | sed 's/0x//')
|
||||
@@ -1101,6 +1178,98 @@ detect_optional_gpu_audio() {
|
||||
IOMMU_VFIO_IDS+=("$new_id")
|
||||
fi
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# Scans the host for all class-04 PCI audio devices and lets the user
|
||||
# pick which ones to pass to the VM. Only invoked when the selected GPU
|
||||
# has no .1 sibling audio function — the dGPU fast path continues to
|
||||
# auto-include that sibling without prompting.
|
||||
#
|
||||
# Devices already in the GPU's IOMMU group are excluded from the list
|
||||
# (analyze_iommu_group has already queued them). The checklist defaults
|
||||
# to all-OFF so nothing gets passed through silently.
|
||||
_prompt_user_for_audio_devices() {
|
||||
# Collect eligible audio BDFs from sysfs.
|
||||
local -a candidates=()
|
||||
local dev_path bdf
|
||||
for dev_path in /sys/bus/pci/devices/*; do
|
||||
[[ -d "$dev_path" ]] || continue
|
||||
bdf=$(basename "$dev_path")
|
||||
_pci_is_audio_device "$bdf" || continue
|
||||
# Skip ones already queued by the IOMMU group sweep.
|
||||
local skip=false dev
|
||||
for dev in "${IOMMU_DEVICES[@]}"; do
|
||||
[[ "$dev" == "$bdf" ]] && { skip=true; break; }
|
||||
done
|
||||
$skip && continue
|
||||
candidates+=("$bdf")
|
||||
done
|
||||
|
||||
[[ ${#candidates[@]} -eq 0 ]] && return 0
|
||||
|
||||
# Build checklist items: tag=BDF, description="<name> (driver: X)".
|
||||
local -a items=()
|
||||
local name drv label
|
||||
for bdf in "${candidates[@]}"; do
|
||||
name=$(lspci -nn -s "${bdf#0000:}" 2>/dev/null \
|
||||
| sed 's/^[^ ]* //' \
|
||||
| sed 's/ \[0401\]//; s/ \[0403\]//; s/ \[0400\]//' \
|
||||
| cut -c1-52)
|
||||
[[ -z "$name" ]] && name="PCI audio"
|
||||
drv=$(_get_pci_driver "$bdf")
|
||||
label="${name} (driver: ${drv})"
|
||||
items+=("$bdf" "$label" "off")
|
||||
done
|
||||
|
||||
local prompt selection dialog_h list_h
|
||||
prompt="$(translate 'The selected GPU has no dedicated .1 audio sibling function.')\n"
|
||||
prompt+="$(translate 'If you want HDMI/analog audio inside the VM, select the audio controller(s) to pass through along with the GPU.')\n\n"
|
||||
prompt+="$(translate 'Default is none (video-only passthrough). Use SPACE to toggle selections.')"
|
||||
|
||||
# Give the list area a floor of 4 rows so a single candidate doesn't
|
||||
# render cramped under the description. Overall dialog height scales
|
||||
# with that floor + room for the 4-line prompt, blank line, borders
|
||||
# and button row.
|
||||
list_h=${#candidates[@]}
|
||||
(( list_h < 4 )) && list_h=4
|
||||
dialog_h=$(( list_h + 14 ))
|
||||
|
||||
selection=$(_pmx_checklist \
|
||||
"$(translate 'Add Audio Passthrough')" \
|
||||
"$prompt" \
|
||||
"$dialog_h" 82 "$list_h" \
|
||||
"${items[@]}") || return 0
|
||||
|
||||
# dialog wraps selected tags in quotes, whiptail does not — _strip them.
|
||||
selection=$(echo "$selection" | tr -d '"')
|
||||
[[ -z "$selection" ]] && return 0
|
||||
|
||||
local picked
|
||||
for picked in $selection; do
|
||||
_register_gpu_audio_device "$picked"
|
||||
done
|
||||
}
|
||||
|
||||
detect_optional_gpu_audio() {
|
||||
EXTRA_AUDIO_DEVICES=()
|
||||
EXTRA_AUDIO_INFO=()
|
||||
|
||||
# Fast path: dGPUs (NVIDIA / AMD discrete) and some APUs expose audio
|
||||
# as function .1 of the same slot. When present, auto-include it —
|
||||
# this is the unambiguous, always-safe case because such audio only
|
||||
# outputs through the GPU's own ports and was never used by the host.
|
||||
local sibling_audio="${SELECTED_GPU_PCI%.*}.1"
|
||||
if _pci_is_audio_device "$sibling_audio"; then
|
||||
_register_gpu_audio_device "$sibling_audio"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Slow path: no sibling audio (typical for Intel iGPUs whose HDMI
|
||||
# audio lives on the PCH, or setups with an external sound card).
|
||||
# Ask the user explicitly via checklist — the decision of whether to
|
||||
# pass chipset audio alongside an iGPU is intentional, not automatic.
|
||||
_prompt_user_for_audio_devices
|
||||
}
|
||||
|
||||
|
||||
@@ -1375,8 +1544,19 @@ confirm_summary() {
|
||||
else
|
||||
msg+=" • $(translate 'hostpci entries for all IOMMU group devices')\n"
|
||||
fi
|
||||
[[ ${#EXTRA_AUDIO_DEVICES[@]} -gt 0 ]] && \
|
||||
msg+=" • $(translate 'Additional GPU audio function will be added'): ${EXTRA_AUDIO_DEVICES[*]}\n"
|
||||
if [[ ${#EXTRA_AUDIO_DEVICES[@]} -gt 0 ]]; then
|
||||
msg+=" • $(translate 'Additional audio function(s) to be added'):\n"
|
||||
local _audio_info _audio_bdf _audio_drv
|
||||
for _audio_info in "${EXTRA_AUDIO_INFO[@]}"; do
|
||||
_audio_bdf="${_audio_info%%|*}"
|
||||
_audio_drv="${_audio_info#*|}"
|
||||
if [[ -n "$_audio_drv" && "$_audio_drv" != "none" && "$_audio_drv" != "vfio-pci" ]]; then
|
||||
msg+=" • ${_audio_bdf} \Zb(${_audio_drv})\Zn\n"
|
||||
else
|
||||
msg+=" • ${_audio_bdf}\n"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
[[ "$SELECTED_GPU" == "nvidia" ]] && \
|
||||
msg+=" • $(translate 'NVIDIA KVM hiding (cpu hidden=1)')\n"
|
||||
if [[ "$SWITCH_FROM_LXC" == "true" ]]; then
|
||||
@@ -1698,7 +1878,7 @@ cleanup_lxc_configs() {
|
||||
[[ "$SWITCH_FROM_LXC" != "true" ]] && return 0
|
||||
[[ ${#LXC_AFFECTED_CTIDS[@]} -eq 0 ]] && return 0
|
||||
|
||||
msg_info "$(translate 'Applying selected LXC switch action...')"
|
||||
msg_info2 "$(translate 'Applying selected LXC switch action')"
|
||||
|
||||
local i
|
||||
for i in "${!LXC_AFFECTED_CTIDS[@]}"; do
|
||||
@@ -1708,7 +1888,11 @@ cleanup_lxc_configs() {
|
||||
|
||||
if [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]]; then
|
||||
msg_info "$(translate 'Stopping LXC') ${ctid}..."
|
||||
if pct stop "$ctid" >>"$LOG_FILE" 2>&1; then
|
||||
# _pmx_stop_lxc: graceful shutdown with forceStop+timeout, then
|
||||
# fallback to pct stop. Avoids the indefinite hang that raw
|
||||
# `pct stop` produces when the container is locked or has
|
||||
# unresponsive processes (Plex, databases, etc.).
|
||||
if _pmx_stop_lxc "$ctid" "$LOG_FILE"; then
|
||||
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
|
||||
else
|
||||
msg_warn "$(translate 'Could not stop LXC') ${ctid}" | tee -a "$screen_capture"
|
||||
@@ -1765,8 +1949,73 @@ cleanup_vm_config() {
|
||||
local src_conf="/etc/pve/qemu-server/${SWITCH_VM_SRC}.conf"
|
||||
if [[ -f "$src_conf" ]]; then
|
||||
msg_info "$(translate 'Removing GPU from VM') ${SWITCH_VM_SRC}..."
|
||||
sed -i "/^hostpci[0-9]\+:.*${pci_slot}/d" "$src_conf"
|
||||
# Precise regex: slot must be followed by ".<function>" and a
|
||||
# delimiter. Kept in sync with switch_gpu_mode.sh. A looser
|
||||
# ".*${pci_slot}" would match the slot as a substring and wipe
|
||||
# unrelated hostpci entries (e.g. slot "00:02" matching inside
|
||||
# a dGPU BDF 0000:02:00.0).
|
||||
sed -E -i "/^hostpci[0-9]+:[[:space:]]*(0000:)?${pci_slot}\.[0-7]([,[:space:]]|$)/d" "$src_conf"
|
||||
msg_ok "$(translate 'GPU removed from VM') ${SWITCH_VM_SRC}" | tee -a "$screen_capture"
|
||||
|
||||
# Cascade cleanup: detect audio companions orphaned in the
|
||||
# source VM after the GPU slot is removed. Typical case: the
|
||||
# source VM had an Intel iGPU at 00:02.0 paired with chipset
|
||||
# audio at 00:1f.3 via the Part 1 checklist — the sed above
|
||||
# only strips 00:02.* entries, leaving the chipset audio
|
||||
# hostpci pointing at a device the source VM no longer uses.
|
||||
#
|
||||
# Unlike switch_gpu_mode (detach flow), we deliberately do NOT
|
||||
# touch /etc/modprobe.d/vfio.conf here. The GPU is being moved
|
||||
# to the current target VM, which may select the same audio
|
||||
# companion in its own Part 1 checklist. Any vendor:device
|
||||
# orphaned in vfio.conf after this move is inert — the user
|
||||
# can clean it up later via switch_gpu_mode if they want.
|
||||
if declare -F _vm_list_orphan_audio_hostpci >/dev/null 2>&1; then
|
||||
local _orphan_audio
|
||||
_orphan_audio=$(_vm_list_orphan_audio_hostpci "$SWITCH_VM_SRC" "$pci_slot")
|
||||
if [[ -n "$_orphan_audio" ]]; then
|
||||
local -a _orph_items=()
|
||||
local _oline _o_idx _o_bdf _o_name
|
||||
while IFS= read -r _oline; do
|
||||
[[ -z "$_oline" ]] && continue
|
||||
_o_idx="${_oline%%|*}"
|
||||
_oline="${_oline#*|}"
|
||||
_o_bdf="${_oline%%|*}"
|
||||
_o_name="${_oline#*|}"
|
||||
_orph_items+=("$_o_idx" "${_o_bdf} ${_o_name}" "on")
|
||||
done <<< "$_orphan_audio"
|
||||
|
||||
local _prompt
|
||||
_prompt="\n$(translate 'The GPU has been moved out of VM') \Zb${SWITCH_VM_SRC}\Zn.\n\n"
|
||||
_prompt+="$(translate 'The source VM also has these audio devices, likely added together with the GPU. Remove them too?')\n\n"
|
||||
_prompt+="$(translate '(Checked entries will be removed. Uncheck to keep in VM.)')"
|
||||
|
||||
local _selected
|
||||
_selected=$(_pmx_checklist \
|
||||
"$(translate 'Associated Audio Devices')" \
|
||||
"$_prompt" \
|
||||
20 84 "$(( ${#_orph_items[@]} / 3 ))" \
|
||||
"${_orph_items[@]}") || _selected=""
|
||||
_selected=$(echo "$_selected" | tr -d '"')
|
||||
|
||||
local _sel _removed=""
|
||||
for _sel in $_selected; do
|
||||
if declare -F _vm_remove_hostpci_index >/dev/null 2>&1; then
|
||||
_vm_remove_hostpci_index "$SWITCH_VM_SRC" "$_sel" "$LOG_FILE" \
|
||||
&& _removed+=" hostpci${_sel}"
|
||||
else
|
||||
qm set "$SWITCH_VM_SRC" --delete "hostpci${_sel}" >>"$LOG_FILE" 2>&1 \
|
||||
&& _removed+=" hostpci${_sel}"
|
||||
fi
|
||||
done
|
||||
if [[ -n "$_removed" ]]; then
|
||||
show_proxmenux_logo
|
||||
msg_title "${run_title}"
|
||||
msg_ok "$(translate 'Associated audio removed from VM'): ${SWITCH_VM_SRC} —${_removed}" \
|
||||
| tee -a "$screen_capture"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -1922,6 +2171,7 @@ main() {
|
||||
detect_host_gpus
|
||||
check_iommu_enabled
|
||||
select_gpu
|
||||
check_sriov_and_block_if_needed
|
||||
warn_single_gpu
|
||||
select_vm
|
||||
ensure_selected_gpu_not_already_in_target_vm
|
||||
@@ -2025,10 +2275,23 @@ main() {
|
||||
|
||||
rm -f "$screen_capture"
|
||||
|
||||
# Final reboot prompt. Whiptail is invoked directly (not through
|
||||
# the _pmx_yesno helper) because the ProxMenux menu chain
|
||||
# (menu → main_menu → hw_grafics_menu → add_gpu_vm) has been
|
||||
# verified to work reliably with a bare whiptail here, while the
|
||||
# dialog-based helper path hits process-group / TTY edge cases in
|
||||
# that exact chain.
|
||||
#
|
||||
# The extra `Press Enter to continue ... read -r` between whiptail
|
||||
# and `reboot` is deliberate — it gives the user a visible pause
|
||||
# after the dialog closes so an accidental Enter on the yes button
|
||||
# cannot trigger an immediate reboot.
|
||||
if [[ "$HOST_CONFIG_CHANGED" == "true" ]]; then
|
||||
whiptail --title "$(translate 'Reboot Required')" \
|
||||
--yesno "$(translate 'A reboot is required for VFIO binding to take effect. Do you want to restart now?')" 10 68
|
||||
if [[ $? -eq 0 ]]; then
|
||||
msg_success "$(translate 'Press Enter to continue...')"
|
||||
read -r
|
||||
msg_warn "$(translate 'Rebooting the system...')"
|
||||
reboot
|
||||
else
|
||||
|
||||
@@ -95,7 +95,7 @@ detect_driver_status() {
|
||||
CURRENT_DRIVER_VERSION=""
|
||||
|
||||
# First check if nvidia kernel module is actually loaded
|
||||
if lsmod | grep -q "^nvidia "; then
|
||||
if grep -q "^nvidia " /proc/modules 2>/dev/null; then
|
||||
|
||||
modprobe nvidia-uvm 2>/dev/null || true
|
||||
sleep 1
|
||||
@@ -273,7 +273,7 @@ update_lxc_nvidia() {
|
||||
free_mb=$(pct exec "$ctid" -- df -m / 2>/dev/null | awk 'NR==2{print $4}' || echo 0)
|
||||
if [[ "$free_mb" -lt 1500 ]]; then
|
||||
_restore_container_memory "$ctid"
|
||||
dialog --backtitle "ProxMenux" \
|
||||
whiptail --backtitle "ProxMenux" \
|
||||
--title "$(translate 'Insufficient Disk Space')" \
|
||||
--msgbox "\n$(translate 'Container') ${ctid} $(translate 'has only') ${free_mb}MB $(translate 'of free disk space.')\n\n$(translate 'NVIDIA libs require approximately 1.5GB of free space.')" \
|
||||
11 72
|
||||
@@ -381,7 +381,7 @@ offer_lxc_updates_if_any() {
|
||||
done
|
||||
info+="\n$(translate 'Do you want to update the NVIDIA userspace libraries inside these containers to match the host?')"
|
||||
|
||||
if ! hybrid_yesno "$(translate 'Update NVIDIA in LXC Containers')" "$info" 20 80; then
|
||||
if ! hybrid_whiptail_yesno "$(translate 'Update NVIDIA in LXC Containers')" "$info" 20 80; then
|
||||
msg_info2 "$(translate 'LXC update skipped by user.')"
|
||||
return 0
|
||||
fi
|
||||
@@ -427,12 +427,14 @@ options nouveau modeset=0
|
||||
EOF
|
||||
|
||||
# Attempt to unload nouveau if currently loaded
|
||||
if lsmod | grep -q "^nouveau "; then
|
||||
if grep -q "^nouveau " /proc/modules 2>/dev/null; then
|
||||
|
||||
msg_info "$(translate 'Nouveau module is loaded, attempting to unload...')"
|
||||
modprobe -r nouveau 2>/dev/null || true
|
||||
sleep 1
|
||||
|
||||
# Check if unload succeeded
|
||||
if lsmod | grep -q "^nouveau "; then
|
||||
if grep -q "^nouveau " /proc/modules 2>/dev/null; then
|
||||
NOUVEAU_STILL_LOADED=true
|
||||
msg_warn "$(translate 'Could not unload nouveau module (may be in use). The blacklist will take effect after reboot. Installation will continue but a reboot will be required.')"
|
||||
echo "WARNING: nouveau module still loaded after unload attempt" >> "$LOG_FILE"
|
||||
@@ -444,6 +446,7 @@ EOF
|
||||
NOUVEAU_STILL_LOADED=false
|
||||
msg_ok "$(translate 'nouveau driver has been blacklisted.')" | tee -a "$screen_capture"
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
ensure_modules_config() {
|
||||
@@ -487,7 +490,7 @@ stop_and_disable_nvidia_services() {
|
||||
systemctl disable "$service" >/dev/null 2>&1 || true
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
sleep 2
|
||||
|
||||
msg_ok "$(translate 'NVIDIA services stopped and disabled.')" | tee -a "$screen_capture"
|
||||
@@ -495,41 +498,45 @@ stop_and_disable_nvidia_services() {
|
||||
}
|
||||
|
||||
unload_nvidia_modules() {
|
||||
msg_info "$(translate 'Unloading NVIDIA kernel modules...')"
|
||||
|
||||
for mod in nvidia_uvm nvidia_drm nvidia_modeset nvidia; do
|
||||
modprobe -r "$mod" >/dev/null 2>&1 || true
|
||||
done
|
||||
|
||||
# Give the kernel a moment to finalize sysfs teardown before re-checking.
|
||||
# Reading /proc/modules directly (instead of lsmod) avoids the
|
||||
# "could not open /sys/module/<mod>/holders" race when a module has just
|
||||
# been removed from /proc/modules but its sysfs dir hasn't been reaped yet.
|
||||
sleep 1
|
||||
|
||||
if lsmod | grep -qi '\bnvidia'; then
|
||||
if grep -q "^nvidia" /proc/modules 2>/dev/null; then
|
||||
for mod in nvidia_uvm nvidia_drm nvidia_modeset nvidia; do
|
||||
modprobe -r --force "$mod" >/dev/null 2>&1 || true
|
||||
done
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
if lsmod | grep -qi '\bnvidia'; then
|
||||
msg_warn "$(translate 'Some NVIDIA modules could not be unloaded. Installation may fail. Ensure no processes are using the GPU.')"
|
||||
if grep -q "^nvidia" /proc/modules 2>/dev/null; then
|
||||
|
||||
if command -v lsof >/dev/null 2>&1; then
|
||||
echo "$(translate 'Processes using NVIDIA:'):" >> "$LOG_FILE"
|
||||
lsof /dev/nvidia* 2>/dev/null >> "$LOG_FILE" || true
|
||||
fi
|
||||
else
|
||||
|
||||
msg_ok "$(translate 'NVIDIA kernel modules unloaded successfully.')" | tee -a "$screen_capture"
|
||||
fi
|
||||
}
|
||||
|
||||
complete_nvidia_uninstall() {
|
||||
msg_info "$(translate 'Completing NVIDIA uninstallation...')"
|
||||
stop_and_disable_nvidia_services
|
||||
unload_nvidia_modules
|
||||
|
||||
if command -v nvidia-uninstall >/dev/null 2>&1; then
|
||||
#msg_info "$(translate 'Running NVIDIA uninstaller...')"
|
||||
msg_info "$(translate 'Running NVIDIA uninstaller...')"
|
||||
nvidia-uninstall --silent >>"$LOG_FILE" 2>&1 || true
|
||||
msg_ok "$(translate 'NVIDIA uninstaller completed.')"
|
||||
fi
|
||||
|
||||
msg_ok "$(translate 'NVIDIA uninstallation steps completed.')" | tee -a "$screen_capture"
|
||||
cleanup_nvidia_dkms
|
||||
|
||||
msg_info "$(translate 'Removing NVIDIA packages...')"
|
||||
@@ -546,10 +553,11 @@ complete_nvidia_uninstall() {
|
||||
find "$NVIDIA_WORKDIR" -type d -name "nvidia-persistenced" -exec rm -rf {} + 2>/dev/null || true
|
||||
find "$NVIDIA_WORKDIR" -type d -name "nvidia-patch" -exec rm -rf {} + 2>/dev/null || true
|
||||
fi
|
||||
|
||||
|
||||
update_component_status "nvidia_driver" "removed" "" "gpu" '{}'
|
||||
|
||||
msg_ok "$(translate 'Complete NVIDIA uninstallation finished.')" | tee -a "$screen_capture"
|
||||
|
||||
}
|
||||
|
||||
cleanup_nvidia_dkms() {
|
||||
@@ -785,7 +793,7 @@ download_nvidia_installer() {
|
||||
return 0
|
||||
else
|
||||
echo "Existing file FAILED integrity check, removing..." >> "$LOG_FILE"
|
||||
msg_warn "$(translate 'Existing file failed verification, re-downloading...')" >&2
|
||||
msg_warn "$(translate 'Existing file, re-downloading...')" >&2
|
||||
rm -f "$run_file"
|
||||
fi
|
||||
else
|
||||
@@ -916,7 +924,8 @@ run_nvidia_installer() {
|
||||
update-initramfs -u -k all >>"$LOG_FILE" 2>&1 || true
|
||||
# Try one more time to unload nouveau after initramfs rebuild
|
||||
modprobe -r nouveau 2>/dev/null || true
|
||||
if lsmod | grep -q "^nouveau "; then
|
||||
sleep 1
|
||||
if grep -q "^nouveau " /proc/modules 2>/dev/null; then
|
||||
echo "WARNING: nouveau still loaded after initramfs rebuild, proceeding with --no-nouveau-check" >> "$LOG_FILE"
|
||||
msg_warn "$(translate 'nouveau still active. Proceeding with installation. A reboot will be required for the driver to work.')"
|
||||
else
|
||||
@@ -1226,7 +1235,7 @@ main() {
|
||||
|
||||
show_proxmenux_logo
|
||||
msg_title "$(translate "$SCRIPT_TITLE")"
|
||||
msg_info2 "$(translate 'Uninstalling current NVIDIA driver before installing new version...')"
|
||||
msg_info2 "$(translate 'Uninstalling current NVIDIA driver before installing new version')"
|
||||
complete_nvidia_uninstall
|
||||
|
||||
sleep 2
|
||||
@@ -1336,4 +1345,4 @@ main() {
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
||||
main
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -624,6 +624,75 @@ select_gpus() {
|
||||
read -ra SELECTED_GPU_IDX <<< "$sel"
|
||||
}
|
||||
|
||||
# ==========================================================
|
||||
# SR-IOV guard — abort mode switch when SR-IOV is active
|
||||
# ==========================================================
|
||||
# Intel i915-sriov-dkms and AMD MxGPU split a Physical Function (PF) into
|
||||
# multiple Virtual Functions (VFs). Switching the PF's driver destroys
|
||||
# every VF; switching a VF's driver affects only that VF. ProxMenux does
|
||||
# not yet manage the SR-IOV lifecycle (create/destroy VFs, track per-VF
|
||||
# ownership), so operating on a PF with active VFs — or on a VF itself —
|
||||
# would leave the user's virtualization stack in an inconsistent state.
|
||||
# We detect the situation early and hand the user back to the Proxmox
|
||||
# web UI, which understands VFs as first-class PCI devices.
|
||||
check_sriov_and_block_if_needed() {
|
||||
declare -F _pci_sriov_role >/dev/null 2>&1 || return 0
|
||||
|
||||
local idx pci role first_word pf_bdf active_count
|
||||
local -a vf_list=()
|
||||
local -a pf_list=()
|
||||
|
||||
for idx in "${SELECTED_GPU_IDX[@]}"; do
|
||||
pci="${ALL_GPU_PCIS[$idx]}"
|
||||
role=$(_pci_sriov_role "$pci")
|
||||
first_word="${role%% *}"
|
||||
case "$first_word" in
|
||||
vf)
|
||||
pf_bdf="${role#vf }"
|
||||
vf_list+=("${pci}|${pf_bdf}")
|
||||
;;
|
||||
pf-active)
|
||||
active_count="${role#pf-active }"
|
||||
pf_list+=("${pci}|${active_count}")
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ ${#vf_list[@]} -eq 0 && ${#pf_list[@]} -eq 0 ]] && return 0
|
||||
|
||||
local title msg entry bdf parent cnt
|
||||
title="$(translate 'SR-IOV Configuration Detected')"
|
||||
msg="\n"
|
||||
|
||||
if [[ ${#vf_list[@]} -gt 0 ]]; then
|
||||
msg+="$(translate 'The following selected device(s) are SR-IOV Virtual Functions (VFs):')\n\n"
|
||||
for entry in "${vf_list[@]}"; do
|
||||
bdf="${entry%%|*}"
|
||||
parent="${entry#*|}"
|
||||
msg+=" • ${bdf} $(translate '(parent PF:') ${parent})\n"
|
||||
done
|
||||
msg+="\n"
|
||||
fi
|
||||
|
||||
if [[ ${#pf_list[@]} -gt 0 ]]; then
|
||||
msg+="$(translate 'The following selected device(s) are Physical Functions with active Virtual Functions:')\n\n"
|
||||
for entry in "${pf_list[@]}"; do
|
||||
bdf="${entry%%|*}"
|
||||
cnt="${entry#*|}"
|
||||
msg+=" • ${bdf} — ${cnt} $(translate 'active VF(s)')\n"
|
||||
done
|
||||
msg+="\n"
|
||||
fi
|
||||
|
||||
msg+="$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')"
|
||||
|
||||
dialog --backtitle "ProxMenux" \
|
||||
--title "$title" \
|
||||
--msgbox "$msg" 20 80
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
collect_selected_iommu_ids() {
|
||||
SELECTED_IOMMU_IDS=()
|
||||
SELECTED_PCI_SLOTS=()
|
||||
@@ -766,8 +835,14 @@ apply_lxc_action_for_vm_mode() {
|
||||
|
||||
if [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]]; then
|
||||
msg_info "$(translate 'Stopping LXC') ${ctid}..."
|
||||
pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true
|
||||
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
|
||||
# _pmx_stop_lxc: unlock + graceful shutdown with forceStop+timeout,
|
||||
# fallback to pct stop. Prevents the indefinite hang that raw
|
||||
# `pct stop` triggers on locked / stuck containers.
|
||||
if _pmx_stop_lxc "$ctid" "$LOG_FILE"; then
|
||||
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
|
||||
else
|
||||
msg_warn "$(translate 'Could not stop LXC') ${ctid}" | tee -a "$screen_capture"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$LXC_ACTION" == "keep_gpu_disable_onboot" && "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]]; then
|
||||
@@ -879,11 +954,102 @@ apply_vm_action_for_lxc_mode() {
|
||||
fi
|
||||
|
||||
if [[ "$VM_ACTION" == "remove_gpu_keep_onboot" && -f "$conf" ]]; then
|
||||
# Primary cleanup: strip hostpci lines whose BDF matches any of
|
||||
# the GPU's selected slots. Matches both the PF function (.0) and
|
||||
# any sibling audio or HDMI codec that shares the slot (typical
|
||||
# for discrete NVIDIA/AMD cards where .1 is the HDMI audio).
|
||||
#
|
||||
# Precise regex: the slot must be followed by ".<function>" and
|
||||
# either a delimiter or end-of-line. A looser ".*${slot}" would
|
||||
# match by pure substring and delete unrelated hostpci entries —
|
||||
# e.g. slot "00:02" would match inside "0000:02:00.0" (a dGPU at
|
||||
# 02:00) and wipe both the iGPU and the unrelated dGPU.
|
||||
local slot
|
||||
for slot in "${SELECTED_PCI_SLOTS[@]}"; do
|
||||
sed -i "/^hostpci[0-9]\+:.*${slot}/d" "$conf"
|
||||
sed -E -i "/^hostpci[0-9]+:[[:space:]]*(0000:)?${slot}\.[0-7]([,[:space:]]|$)/d" "$conf"
|
||||
done
|
||||
msg_ok "$(translate 'GPU removed from VM config') ${vmid}" | tee -a "$screen_capture"
|
||||
|
||||
# Cascade cleanup: Intel iGPU passthrough typically pairs the GPU
|
||||
# at 00:02.0 with chipset audio at 00:1f.3, which lives at a
|
||||
# different slot and therefore survives the sed above. If it
|
||||
# stays in the VM config after the GPU is gone, the VM either
|
||||
# fails to start (vfio-pci no longer claims 8086:51c8 after the
|
||||
# switch-back) or it steals host audio unnecessarily. Enumerate
|
||||
# orphan audio hostpci entries and ask the user what to do.
|
||||
if declare -F _vm_list_orphan_audio_hostpci >/dev/null 2>&1; then
|
||||
local _orphan_audio
|
||||
_orphan_audio=$(_vm_list_orphan_audio_hostpci "$vmid" "${SELECTED_PCI_SLOTS[0]}")
|
||||
if [[ -n "$_orphan_audio" ]]; then
|
||||
local -a _orph_items=()
|
||||
local _line _o_idx _o_bdf _o_name
|
||||
while IFS= read -r _line; do
|
||||
[[ -z "$_line" ]] && continue
|
||||
_o_idx="${_line%%|*}"
|
||||
_line="${_line#*|}"
|
||||
_o_bdf="${_line%%|*}"
|
||||
_o_name="${_line#*|}"
|
||||
_orph_items+=("$_o_idx" "${_o_bdf} ${_o_name}" "on")
|
||||
done <<< "$_orphan_audio"
|
||||
|
||||
local _prompt _selected
|
||||
_prompt="\n$(translate 'The GPU is being detached from VM') \Zb${vmid}\Zn.\n\n"
|
||||
_prompt+="$(translate 'The VM also has these audio devices assigned via PCI passthrough — typically added together with the GPU. Remove them too?')\n\n"
|
||||
_prompt+="$(translate '(Checked entries will be removed. Uncheck to keep in VM.)')"
|
||||
|
||||
_selected=$(dialog --backtitle "ProxMenux" --colors \
|
||||
--title "$(translate 'Associated Audio Devices')" \
|
||||
--checklist "$_prompt" 20 84 "$(( ${#_orph_items[@]} / 3 ))" \
|
||||
"${_orph_items[@]}" \
|
||||
2>&1 >/dev/tty) || _selected=""
|
||||
_selected=$(echo "$_selected" | tr -d '"')
|
||||
|
||||
# Cross-reference table so we can recover each selected idx's
|
||||
# original BDF (we need it for vendor:device lookup below).
|
||||
declare -A _orphan_bdf_by_idx=()
|
||||
local _o_line _o_i _o_b
|
||||
while IFS= read -r _o_line; do
|
||||
[[ -z "$_o_line" ]] && continue
|
||||
_o_i="${_o_line%%|*}"
|
||||
_o_line="${_o_line#*|}"
|
||||
_o_b="${_o_line%%|*}"
|
||||
_orphan_bdf_by_idx["$_o_i"]="$_o_b"
|
||||
done <<< "$_orphan_audio"
|
||||
|
||||
local _sel _removed_audio="" _rem_bdf _vd_hex _dd_hex _vd_id
|
||||
for _sel in $_selected; do
|
||||
_rem_bdf="${_orphan_bdf_by_idx[$_sel]:-}"
|
||||
if _vm_remove_hostpci_index "$vmid" "$_sel" "$LOG_FILE"; then
|
||||
_removed_audio+=" hostpci${_sel}"
|
||||
|
||||
# Fix B: if the removed audio BDF is not referenced by any
|
||||
# OTHER VM, its vendor:device can safely come out of
|
||||
# /etc/modprobe.d/vfio.conf too. Without this step,
|
||||
# SELECTED_IOMMU_IDS only held the GPU's own IOMMU group
|
||||
# (e.g. 8086:46a3 for Intel iGPU) and the companion audio
|
||||
# id (e.g. 8086:51c8 for chipset audio) survived in
|
||||
# vfio.conf, so vfio-pci kept claiming it at next boot
|
||||
# even though nothing used it.
|
||||
[[ -z "$_rem_bdf" ]] && continue
|
||||
if ! _pci_bdf_in_any_vm "$_rem_bdf" "${VM_AFFECTED_IDS[@]}"; then
|
||||
_vd_hex=$(cat "/sys/bus/pci/devices/${_rem_bdf}/vendor" 2>/dev/null | sed 's/^0x//')
|
||||
_dd_hex=$(cat "/sys/bus/pci/devices/${_rem_bdf}/device" 2>/dev/null | sed 's/^0x//')
|
||||
if [[ -n "$_vd_hex" && -n "$_dd_hex" ]]; then
|
||||
_vd_id="${_vd_hex}:${_dd_hex}"
|
||||
if ! _contains_in_array "$_vd_id" "${SELECTED_IOMMU_IDS[@]}"; then
|
||||
SELECTED_IOMMU_IDS+=("$_vd_id")
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
unset _orphan_bdf_by_idx
|
||||
if [[ -n "$_removed_audio" ]]; then
|
||||
msg_ok "$(translate 'Associated audio removed from VM'): ${_removed_audio# }" \
|
||||
| tee -a "$screen_capture"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -1164,6 +1330,7 @@ main() {
|
||||
detect_host_gpus
|
||||
while true; do
|
||||
select_gpus
|
||||
check_sriov_and_block_if_needed
|
||||
select_target_mode
|
||||
[[ $? -eq 2 ]] && continue
|
||||
validate_vm_mode_blocked_ids
|
||||
|
||||
@@ -507,6 +507,67 @@ find_gpu_by_slot() {
|
||||
return 1
|
||||
}
|
||||
|
||||
# ==========================================================
|
||||
# SR-IOV guard — abort mode switch when SR-IOV is active
|
||||
# ==========================================================
|
||||
# Same policy as the interactive switch_gpu_mode.sh: refuse to operate on
|
||||
# a Virtual Function or on a Physical Function that already has active
|
||||
# VFs, since flipping drivers in that state collapses the VF tree and
|
||||
# breaks every guest that was consuming a VF.
|
||||
check_sriov_and_block_if_needed() {
|
||||
declare -F _pci_sriov_role >/dev/null 2>&1 || return 0
|
||||
|
||||
local idx pci role first_word pf_bdf active_count
|
||||
local -a vf_list=()
|
||||
local -a pf_list=()
|
||||
|
||||
for idx in "${SELECTED_GPU_IDX[@]}"; do
|
||||
pci="${ALL_GPU_PCIS[$idx]}"
|
||||
role=$(_pci_sriov_role "$pci")
|
||||
first_word="${role%% *}"
|
||||
case "$first_word" in
|
||||
vf)
|
||||
pf_bdf="${role#vf }"
|
||||
vf_list+=("${pci}|${pf_bdf}")
|
||||
;;
|
||||
pf-active)
|
||||
active_count="${role#pf-active }"
|
||||
pf_list+=("${pci}|${active_count}")
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ ${#vf_list[@]} -eq 0 && ${#pf_list[@]} -eq 0 ]] && return 0
|
||||
|
||||
local msg entry bdf parent cnt
|
||||
msg="<div style='color:#f0ad4e;font-weight:bold;margin-bottom:10px;'>$(translate 'SR-IOV Configuration Detected')</div>"
|
||||
|
||||
if [[ ${#vf_list[@]} -gt 0 ]]; then
|
||||
msg+="<p>$(translate 'The following selected device(s) are SR-IOV Virtual Functions (VFs):')</p><ul>"
|
||||
for entry in "${vf_list[@]}"; do
|
||||
bdf="${entry%%|*}"
|
||||
parent="${entry#*|}"
|
||||
msg+="<li><code>${bdf}</code> — $(translate 'parent PF:') <code>${parent}</code></li>"
|
||||
done
|
||||
msg+="</ul>"
|
||||
fi
|
||||
|
||||
if [[ ${#pf_list[@]} -gt 0 ]]; then
|
||||
msg+="<p>$(translate 'The following selected device(s) are Physical Functions with active Virtual Functions:')</p><ul>"
|
||||
for entry in "${pf_list[@]}"; do
|
||||
bdf="${entry%%|*}"
|
||||
cnt="${entry#*|}"
|
||||
msg+="<li><code>${bdf}</code> — ${cnt} $(translate 'active VF(s)')</li>"
|
||||
done
|
||||
msg+="</ul>"
|
||||
fi
|
||||
|
||||
msg+="<p>$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')</p>"
|
||||
|
||||
hybrid_msgbox "$(translate 'SR-IOV Configuration Detected')" "$msg"
|
||||
return 1
|
||||
}
|
||||
|
||||
validate_vm_mode_blocked_ids() {
|
||||
[[ "$TARGET_MODE" != "vm" ]] && return 0
|
||||
|
||||
@@ -687,8 +748,14 @@ apply_lxc_action_for_vm_mode() {
|
||||
|
||||
if [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]]; then
|
||||
msg_info "$(translate 'Stopping LXC') ${ctid}..."
|
||||
pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true
|
||||
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
|
||||
# _pmx_stop_lxc: unlock + graceful shutdown with forceStop+timeout,
|
||||
# fallback to pct stop. Prevents the indefinite hang that raw
|
||||
# `pct stop` triggers on locked / stuck containers.
|
||||
if _pmx_stop_lxc "$ctid" "$LOG_FILE"; then
|
||||
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
|
||||
else
|
||||
msg_warn "$(translate 'Could not stop LXC') ${ctid}" | tee -a "$screen_capture"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$LXC_ACTION" == "keep_gpu_disable_onboot" && "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]]; then
|
||||
@@ -804,11 +871,67 @@ apply_vm_action_for_lxc_mode() {
|
||||
fi
|
||||
|
||||
if [[ "$VM_ACTION" == "remove_gpu_keep_onboot" && -f "$conf" ]]; then
|
||||
# Primary cleanup: strip hostpci lines whose BDF matches any of
|
||||
# the GPU's selected slots. Matches both the PF function (.0) and
|
||||
# sibling audio/HDMI codecs (.1, typical for discrete cards).
|
||||
#
|
||||
# Precise regex: the slot must be followed by ".<function>" and a
|
||||
# delimiter. Kept in sync with switch_gpu_mode.sh — a looser
|
||||
# substring match would wipe unrelated hostpci entries (e.g. slot
|
||||
# "00:02" matching as a substring inside a dGPU BDF 0000:02:00.0).
|
||||
local slot
|
||||
for slot in "${SELECTED_PCI_SLOTS[@]}"; do
|
||||
sed -i "/^hostpci[0-9]\+:.*${slot}/d" "$conf"
|
||||
sed -E -i "/^hostpci[0-9]+:[[:space:]]*(0000:)?${slot}\.[0-7]([,[:space:]]|$)/d" "$conf"
|
||||
done
|
||||
msg_ok "$(translate 'GPU removed from VM config') ${vmid}" | tee -a "$screen_capture"
|
||||
|
||||
# Cascade cleanup for the web flow: auto-remove any PCI audio
|
||||
# hostpci entries at a slot DIFFERENT from the GPU (typical Intel
|
||||
# iGPU case where 00:1f.3 chipset audio was paired with the iGPU
|
||||
# at 00:02.0). The helper skips audio devices whose slot already
|
||||
# has a display sibling in the same VM (HDMI codec of another
|
||||
# still-present dGPU), so those are not touched. The web runner
|
||||
# has no good way to render a multi-select checklist, so the
|
||||
# eligible ones are auto-removed and reported verbatim in the log.
|
||||
if declare -F _vm_list_orphan_audio_hostpci >/dev/null 2>&1; then
|
||||
local _orphan_audio _line _o_idx _o_bdf _o_name _removed=""
|
||||
local _vd_hex _dd_hex _vd_id
|
||||
_orphan_audio=$(_vm_list_orphan_audio_hostpci "$vmid" "${SELECTED_PCI_SLOTS[0]}")
|
||||
if [[ -n "$_orphan_audio" ]]; then
|
||||
while IFS= read -r _line; do
|
||||
[[ -z "$_line" ]] && continue
|
||||
_o_idx="${_line%%|*}"
|
||||
_line="${_line#*|}"
|
||||
_o_bdf="${_line%%|*}"
|
||||
_o_name="${_line#*|}"
|
||||
if _vm_remove_hostpci_index "$vmid" "$_o_idx" "$LOG_FILE"; then
|
||||
_removed+=" • hostpci${_o_idx}: ${_o_bdf} ${_o_name}\n"
|
||||
|
||||
# Fix B: also surface the audio's vendor:device to the
|
||||
# upcoming vfio.conf cleanup if no other VM still uses
|
||||
# this BDF. Ensures e.g. 8086:51c8 (Intel chipset audio)
|
||||
# is stripped from /etc/modprobe.d/vfio.conf when the
|
||||
# iGPU it was paired with leaves VM mode.
|
||||
if declare -F _pci_bdf_in_any_vm >/dev/null 2>&1 \
|
||||
&& ! _pci_bdf_in_any_vm "$_o_bdf" "${VM_AFFECTED_IDS[@]}"; then
|
||||
_vd_hex=$(cat "/sys/bus/pci/devices/${_o_bdf}/vendor" 2>/dev/null | sed 's/^0x//')
|
||||
_dd_hex=$(cat "/sys/bus/pci/devices/${_o_bdf}/device" 2>/dev/null | sed 's/^0x//')
|
||||
if [[ -n "$_vd_hex" && -n "$_dd_hex" ]]; then
|
||||
_vd_id="${_vd_hex}:${_dd_hex}"
|
||||
if ! _contains_in_array "$_vd_id" "${SELECTED_IOMMU_IDS[@]}"; then
|
||||
SELECTED_IOMMU_IDS+=("$_vd_id")
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done <<< "$_orphan_audio"
|
||||
if [[ -n "$_removed" ]]; then
|
||||
msg_ok "$(translate 'Associated audio removed from VM'): ${vmid}" \
|
||||
| tee -a "$screen_capture"
|
||||
echo -e "$_removed" | tee -a "$screen_capture"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -1147,6 +1270,12 @@ main() {
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# SR-IOV guard: refuse to toggle the driver on a VF or on a PF with
|
||||
# active VFs. Manual handling via Proxmox web UI is required.
|
||||
if ! check_sriov_and_block_if_needed; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate if GPU is blocked for VM mode (certain Intel GPUs)
|
||||
if ! validate_vm_mode_blocked_ids; then
|
||||
exit 1
|
||||
|
||||
@@ -114,6 +114,33 @@ check_monitor_status() {
|
||||
fi
|
||||
}
|
||||
|
||||
is_beta_program_active() {
|
||||
[[ -f "$CONFIG_FILE" ]] || return 1
|
||||
local flag
|
||||
flag=$(jq -r '.beta_program.status // empty' "$CONFIG_FILE" 2>/dev/null)
|
||||
[[ "$flag" == "active" ]]
|
||||
}
|
||||
|
||||
deactivate_beta_program() {
|
||||
if dialog --clear --backtitle "ProxMenux Configuration" \
|
||||
--title "$(translate "Deactivate Beta Program")" \
|
||||
--yesno "\n$(translate "You will stop receiving beta update prompts. Stable updates continue normally.\n\nTo rejoin the beta program later, run the beta installer again.\n\nDeactivate now?")" 14 64; then
|
||||
local tmp
|
||||
tmp=$(mktemp)
|
||||
if jq '.beta_program.status = "inactive"' "$CONFIG_FILE" > "$tmp" 2>/dev/null; then
|
||||
mv "$tmp" "$CONFIG_FILE"
|
||||
dialog --clear --backtitle "ProxMenux Configuration" \
|
||||
--title "$(translate "Beta Program Deactivated")" \
|
||||
--msgbox "\n\n$(translate "Beta program deactivated. You will now receive stable updates only.")" 10 60
|
||||
else
|
||||
rm -f "$tmp"
|
||||
dialog --clear --backtitle "ProxMenux Configuration" \
|
||||
--title "$(translate "Error")" \
|
||||
--msgbox "\n\n$(translate "Could not update config file.")" 10 50
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
toggle_monitor_service() {
|
||||
local status=$(check_monitor_status)
|
||||
|
||||
@@ -211,7 +238,13 @@ show_config_menu() {
|
||||
option_actions[$option_num]="show_monitor_status"
|
||||
((option_num++))
|
||||
fi
|
||||
|
||||
|
||||
if is_beta_program_active; then
|
||||
menu_options+=("$option_num" "$(translate "Deactivate Beta Program")")
|
||||
option_actions[$option_num]="deactivate_beta"
|
||||
((option_num++))
|
||||
fi
|
||||
|
||||
# Build menu based on installation type
|
||||
if [ "$install_type" = "translation" ]; then
|
||||
menu_options+=("$option_num" "$(translate "Change Language")")
|
||||
@@ -256,6 +289,9 @@ show_config_menu() {
|
||||
"show_monitor_status")
|
||||
show_monitor_status
|
||||
;;
|
||||
"deactivate_beta")
|
||||
deactivate_beta_program
|
||||
;;
|
||||
"change_language")
|
||||
change_language
|
||||
;;
|
||||
|
||||
@@ -27,7 +27,7 @@ initialize_cache
|
||||
while true; do
|
||||
OPTION=$(dialog --colors --backtitle "ProxMenux" \
|
||||
--title "$(translate "GPUs and Coral-TPU Menu")" \
|
||||
--menu "\n$(translate "Select an option:")" 24 78 16 \
|
||||
--menu "\n$(translate "Select an option:")" 26 78 16 \
|
||||
"" "\Z4──────────────────────── HOST ─────────────────────────\Zn" \
|
||||
"1" "$(translate "Install/Update NVIDIA Drivers (Host + LXC)")" \
|
||||
"2" "$(translate "Install/Update Coral TPU on Host")" \
|
||||
|
||||
@@ -173,13 +173,28 @@ run_script_by_slug() {
|
||||
credentials=$(format_credentials "$first")
|
||||
|
||||
# Build info message
|
||||
local msg="\Zb\Z4$(translate "Description"):\Zn\n$desc"
|
||||
[[ -n "$notes_dialog" ]] && msg+="\n\n\Zb\Z4$(translate "Notes"):\Zn\n$notes_dialog"
|
||||
local msg="\Zb\Z4$(translate "Description"):\Zn\n$desc"
|
||||
if [[ -n "$notes" ]]; then
|
||||
local notes_short=""
|
||||
local char_count=0
|
||||
local max_chars=400
|
||||
while IFS= read -r line; do
|
||||
[[ -z "$line" ]] && continue
|
||||
char_count=$(( char_count + ${#line} ))
|
||||
if [[ $char_count -lt $max_chars ]]; then
|
||||
notes_short+="• $line\n"
|
||||
else
|
||||
notes_short+="...\n"
|
||||
break
|
||||
fi
|
||||
done <<< "$notes"
|
||||
msg+="\n\n\Zb\Z4$(translate "Notes"):\Zn\n$notes_short"
|
||||
fi
|
||||
[[ -n "$credentials" ]] && msg+="\n\n\Zb\Z4$(translate "Default Credentials"):\Zn\n$credentials"
|
||||
[[ "$port" -gt 0 ]] && msg+="\n\n\Zb\Z4$(translate "Default Port"):\Zn $port"
|
||||
[[ -n "$website" ]] && msg+="\n\Zb\Z4$(translate "Website"):\Zn $website"
|
||||
|
||||
msg+="\n\n$(translate "Choose how to run the script:"):"
|
||||
msg+="\n\n$(translate "Choose how to run the script:")"
|
||||
|
||||
# Build menu: one or two entries per script_info (GH + optional Mirror)
|
||||
declare -a MENU_OPTS=()
|
||||
@@ -383,7 +398,7 @@ while true; do
|
||||
SELECTED_IDX=$(dialog --backtitle "ProxMenux" \
|
||||
--title "Proxmox VE Helper-Scripts" \
|
||||
--menu "$(translate "Select a category or search for scripts:"):" \
|
||||
20 70 14 "${MENU_ITEMS[@]}" 3>&1 1>&2 2>&3) || {
|
||||
22 75 15 "${MENU_ITEMS[@]}" 3>&1 1>&2 2>&3) || {
|
||||
dialog --clear --title "ProxMenux" \
|
||||
--msgbox "\n\n$(translate "Visit the website to discover more scripts, stay updated with the latest updates, and support the project:")\n\nhttps://community-scripts.github.io/ProxmoxVE" 15 70
|
||||
exec bash "$LOCAL_SCRIPTS/menus/main_menu.sh"
|
||||
@@ -425,7 +440,7 @@ while true; do
|
||||
SCRIPT_INDEX=$(dialog --colors --backtitle "ProxMenux" \
|
||||
--title "$(translate "Scripts in") ${CATEGORY_NAMES[$SELECTED]}" \
|
||||
--menu "$(translate "Choose a script to execute:"):" \
|
||||
20 70 14 "${SCRIPTS[@]}" 3>&1 1>&2 2>&3) || break
|
||||
22 75 15 "${SCRIPTS[@]}" 3>&1 1>&2 2>&3) || break
|
||||
|
||||
SCRIPT_SELECTED="${INDEX_TO_SLUG[$SCRIPT_INDEX]}"
|
||||
run_script_by_slug "$SCRIPT_SELECTED"
|
||||
|
||||
@@ -364,6 +364,41 @@ select_controller_nvme() {
|
||||
return 1
|
||||
fi
|
||||
|
||||
# SR-IOV guard: drop VFs / active PFs and inform the user. Same policy
|
||||
# as add_gpu_vm.sh and the VM creators — refuse to rewrite host VFIO
|
||||
# config for an SR-IOV device since it would collapse the VF tree.
|
||||
if declare -F _pci_sriov_filter_array >/dev/null 2>&1; then
|
||||
local sriov_removed=""
|
||||
sriov_removed=$(_pci_sriov_filter_array SELECTED_CONTROLLER_PCIS)
|
||||
if [[ -n "$sriov_removed" ]]; then
|
||||
local sriov_msg=""
|
||||
sriov_msg="\n$(translate "The following devices were excluded because they are part of an SR-IOV configuration:")\n"
|
||||
local entry bdf role first
|
||||
while IFS= read -r entry; do
|
||||
[[ -z "$entry" ]] && continue
|
||||
bdf="${entry%%|*}"
|
||||
role="${entry#*|}"
|
||||
first="${role%% *}"
|
||||
if [[ "$first" == "vf" ]]; then
|
||||
sriov_msg+="\n • ${bdf} — $(translate "Virtual Function")"
|
||||
else
|
||||
sriov_msg+="\n • ${bdf} — $(translate "Physical Function with") ${role#pf-active } $(translate "active VFs")"
|
||||
fi
|
||||
done <<< "$sriov_removed"
|
||||
sriov_msg+="\n\n$(translate "To pass SR-IOV Virtual Functions to a VM, edit the VM configuration manually via the Proxmox web interface.")"
|
||||
dialog --backtitle "ProxMenux" --colors \
|
||||
--title "$(translate "SR-IOV Configuration Detected")" \
|
||||
--msgbox "$sriov_msg" 18 82
|
||||
fi
|
||||
|
||||
if [[ ${#SELECTED_CONTROLLER_PCIS[@]} -eq 0 ]]; then
|
||||
dialog --backtitle "ProxMenux" \
|
||||
--title "$(translate "Controller + NVMe")" \
|
||||
--msgbox "\n$(translate "No eligible controllers remain after SR-IOV filtering.")" 8 70
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
@@ -1255,6 +1255,48 @@ if [[ ${#EFFECTIVE_IMPORT_DISKS[@]} -gt 0 ]]; then
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then
|
||||
# SR-IOV guard: exclude VFs / active PFs before staging. Mid-flow
|
||||
# phase-2 output; a whiptail msgbox stops the scrolling so the user
|
||||
# actually sees which devices were dropped. After the ack, each
|
||||
# skipped BDF is logged via msg_warn so the action is visible in the
|
||||
# captured log as well.
|
||||
if declare -F _pci_sriov_filter_array >/dev/null 2>&1; then
|
||||
SRIOV_REMOVED=$(_pci_sriov_filter_array CONTROLLER_NVME_PCIS)
|
||||
if [[ -n "$SRIOV_REMOVED" ]]; then
|
||||
SRIOV_MSG=""
|
||||
SRIOV_BDFS=()
|
||||
SRIOV_NL=$'\n'
|
||||
SRIOV_MSG="$(translate "The following devices were excluded from Controller/NVMe passthrough because they are part of an SR-IOV configuration:")"
|
||||
while IFS= read -r SRIOV_ENTRY; do
|
||||
[[ -z "$SRIOV_ENTRY" ]] && continue
|
||||
SRIOV_BDF="${SRIOV_ENTRY%%|*}"
|
||||
SRIOV_ROLE="${SRIOV_ENTRY#*|}"
|
||||
SRIOV_FIRST="${SRIOV_ROLE%% *}"
|
||||
SRIOV_BDFS+=("$SRIOV_BDF")
|
||||
if [[ "$SRIOV_FIRST" == "vf" ]]; then
|
||||
SRIOV_MSG+="${SRIOV_NL} • ${SRIOV_BDF} — $(translate "Virtual Function")"
|
||||
else
|
||||
SRIOV_MSG+="${SRIOV_NL} • ${SRIOV_BDF} — $(translate "Physical Function with") ${SRIOV_ROLE#pf-active } $(translate "active VFs")"
|
||||
fi
|
||||
done <<< "$SRIOV_REMOVED"
|
||||
SRIOV_MSG+="${SRIOV_NL}${SRIOV_NL}$(translate "To pass SR-IOV Virtual Functions to a VM, edit the VM configuration manually via the Proxmox web interface.")"
|
||||
|
||||
whiptail --backtitle "ProxMenux" \
|
||||
--title "$(translate "SR-IOV Configuration Detected")" \
|
||||
--msgbox "$SRIOV_MSG" 18 82
|
||||
|
||||
for SRIOV_SKIPPED in "${SRIOV_BDFS[@]}"; do
|
||||
msg_warn "$(translate "Skipping SR-IOV device"): ${SRIOV_SKIPPED}"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -eq 0 ]]; then
|
||||
msg_warn "$(translate "No eligible Controller/NVMe devices remain after SR-IOV filtering. Skipping.")"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then
|
||||
local CONTROLLER_CAN_STAGE=true
|
||||
if declare -F _pci_is_iommu_active >/dev/null 2>&1 && ! _pci_is_iommu_active; then
|
||||
|
||||
@@ -468,6 +468,55 @@ fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then
|
||||
# SR-IOV guard: drop Virtual Functions / active-PFs before staging.
|
||||
# Proxmox's VFIO rebind via qm hostpci would trigger the same VF-tree
|
||||
# collapse described in the GPU flows, so we exclude them and tell
|
||||
# the user to manage those passthroughs manually.
|
||||
#
|
||||
# UI choice: this runs mid-flow (phase 2 of the wizard, interleaved
|
||||
# with msg_info/msg_ok output), so a whiptail msgbox is used to force
|
||||
# the user to acknowledge the exclusion instead of letting the notice
|
||||
# scroll by with the rest of the processing output. After the user
|
||||
# clicks OK, a per-device msg_warn is emitted so the skipped BDFs
|
||||
# remain visible in the captured log.
|
||||
if declare -F _pci_sriov_filter_array >/dev/null 2>&1; then
|
||||
local _sriov_removed=""
|
||||
_sriov_removed=$(_pci_sriov_filter_array CONTROLLER_NVME_PCIS)
|
||||
if [[ -n "$_sriov_removed" ]]; then
|
||||
local _sriov_msg="" _entry _bdf _role _first _sb
|
||||
local -a _sriov_bdfs=()
|
||||
local _nl=$'\n'
|
||||
_sriov_msg="$(translate "The following devices were excluded from Controller/NVMe passthrough because they are part of an SR-IOV configuration:")"
|
||||
while IFS= read -r _entry; do
|
||||
[[ -z "$_entry" ]] && continue
|
||||
_bdf="${_entry%%|*}"
|
||||
_role="${_entry#*|}"
|
||||
_first="${_role%% *}"
|
||||
_sriov_bdfs+=("$_bdf")
|
||||
if [[ "$_first" == "vf" ]]; then
|
||||
_sriov_msg+="${_nl} • ${_bdf} — $(translate "Virtual Function")"
|
||||
else
|
||||
_sriov_msg+="${_nl} • ${_bdf} — $(translate "Physical Function with") ${_role#pf-active } $(translate "active VFs")"
|
||||
fi
|
||||
done <<< "$_sriov_removed"
|
||||
_sriov_msg+="${_nl}${_nl}$(translate "To pass SR-IOV Virtual Functions to a VM, edit the VM configuration manually via the Proxmox web interface.")"
|
||||
|
||||
whiptail --backtitle "ProxMenux" \
|
||||
--title "$(translate "SR-IOV Configuration Detected")" \
|
||||
--msgbox "$_sriov_msg" 18 82
|
||||
|
||||
for _sb in "${_sriov_bdfs[@]}"; do
|
||||
msg_warn "$(translate "Skipping SR-IOV device"): ${_sb}"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -eq 0 ]]; then
|
||||
msg_warn "$(translate "No eligible Controller/NVMe devices remain after SR-IOV filtering. Skipping.")"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then
|
||||
local CONTROLLER_CAN_STAGE=true
|
||||
if declare -F _pci_is_iommu_active >/dev/null 2>&1 && ! _pci_is_iommu_active; then
|
||||
|
||||
@@ -1270,6 +1270,48 @@ function create_vm() {
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then
|
||||
# SR-IOV guard: mirror of the synology.sh/vm_creator.sh block —
|
||||
# drop VFs and active-PF devices before staging so Proxmox does
|
||||
# not collapse the VF tree at VM start. Mid-flow, so the notice
|
||||
# goes through whiptail (blocking acknowledgment) and each
|
||||
# skipped BDF is then echoed via msg_warn for the log trail.
|
||||
if declare -F _pci_sriov_filter_array >/dev/null 2>&1; then
|
||||
SRIOV_REMOVED=$(_pci_sriov_filter_array CONTROLLER_NVME_PCIS)
|
||||
if [[ -n "$SRIOV_REMOVED" ]]; then
|
||||
SRIOV_MSG=""
|
||||
SRIOV_BDFS=()
|
||||
SRIOV_NL=$'\n'
|
||||
SRIOV_MSG="$(translate "The following devices were excluded from Controller/NVMe passthrough because they are part of an SR-IOV configuration:")"
|
||||
while IFS= read -r SRIOV_ENTRY; do
|
||||
[[ -z "$SRIOV_ENTRY" ]] && continue
|
||||
SRIOV_BDF="${SRIOV_ENTRY%%|*}"
|
||||
SRIOV_ROLE="${SRIOV_ENTRY#*|}"
|
||||
SRIOV_FIRST="${SRIOV_ROLE%% *}"
|
||||
SRIOV_BDFS+=("$SRIOV_BDF")
|
||||
if [[ "$SRIOV_FIRST" == "vf" ]]; then
|
||||
SRIOV_MSG+="${SRIOV_NL} • ${SRIOV_BDF} — $(translate "Virtual Function")"
|
||||
else
|
||||
SRIOV_MSG+="${SRIOV_NL} • ${SRIOV_BDF} — $(translate "Physical Function with") ${SRIOV_ROLE#pf-active } $(translate "active VFs")"
|
||||
fi
|
||||
done <<< "$SRIOV_REMOVED"
|
||||
SRIOV_MSG+="${SRIOV_NL}${SRIOV_NL}$(translate "To pass SR-IOV Virtual Functions to a VM, edit the VM configuration manually via the Proxmox web interface.")"
|
||||
|
||||
whiptail --backtitle "ProxMenux" \
|
||||
--title "$(translate "SR-IOV Configuration Detected")" \
|
||||
--msgbox "$SRIOV_MSG" 18 82
|
||||
|
||||
for SRIOV_SKIPPED in "${SRIOV_BDFS[@]}"; do
|
||||
msg_warn "$(translate "Skipping SR-IOV device"): ${SRIOV_SKIPPED}"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -eq 0 ]]; then
|
||||
msg_warn "$(translate "No eligible Controller/NVMe devices remain after SR-IOV filtering. Skipping.")"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then
|
||||
local CONTROLLER_CAN_STAGE=true
|
||||
if declare -F _pci_is_iommu_active >/dev/null 2>&1 && ! _pci_is_iommu_active; then
|
||||
|
||||
+1
-1
@@ -1 +1 @@
|
||||
1.2.0
|
||||
1.2.1
|
||||
|
||||
Reference in New Issue
Block a user