Spotfire Filter Audit Scripts

def SetFilterTypes():
    # ***********************************************
    # Purpose:
    #    Converts all string column filters to:
    #      1) "Listbox" if more than 7 discrete values
    #      2) "Checkbox" if 7 or less discrete values
    #      3) "Checkbox" always for "Cohort+Dose Level" or "Cohort + Dose Level"
    #      4) "Listbox" always for "Record ID"
    #      5) "Listbox" always for "Subject List"
    #      6) "Range Filter" for "Age"
    # ***********************************************
    from Spotfire.Dxp.Application.Filters import FilterTypeIdentifiers, CheckBoxHierarchyFilter
    from Spotfire.Dxp.Application.Visuals import HtmlTextArea
    from Spotfire.Dxp.Data import DataValueCursor
    
    # ================================================================
    # CONFIGURATION
    # ================================================================
    
    # Always APPLY changes in this script
    applyChanges = True
    
    # Exact-name exceptions that must always be CheckBox (regardless of data table)
    exceptions = set(["COHORT+DOSELEVEL"])
    
    # Exact-name columns that must always be RangeFilter
    range_columns = set(["Age"])
    
    # NEW: partial match list for RangeFilter
    # If any of these substrings is found in the column name (case-insensitive), the expected type becomes RangeFilter.
    partial_match_range_exceptions = ["Age", "Sum of Diameter","% Change from Baseline","% Change from Nadir"]
    
    # Exact-name columns that must always be ListBoxFilter
    listbox_columns = set(["Record ID", "Subject List"])
    
    # Page names to skip (setup pages)
    skip_pages = set(["Dashboard", "General Help"])
    
    # ================================================================
    # INTERNAL STATE
    # ================================================================
    
    unique_cache = {}
    max_distinct_by_column = {}
    
    # pageTitle -> list of rows (only filters we attempted to change)
    updated_rows = {}
    
    
    def get_unique_count(dataTable, tableName, columnName):
        key = (tableName, columnName)
        if key in unique_cache:
            return unique_cache[key]
    
        if not dataTable.Columns.Contains(columnName):
            unique_cache[key] = -1
            return -1
    
        col = dataTable.Columns[columnName]
        cursor = DataValueCursor.CreateFormatted(col)
        distinctRows = dataTable.GetDistinctRows(None, cursor)
        cnt = 0
        distinctRows.Reset()
        while distinctRows.MoveNext():
            cnt += 1
    
        unique_cache[key] = cnt
        return cnt
    
    
    # ================================================================
    # PASS 1: compute max distinct count per column name
    # ================================================================
    
    for i in range(Document.Pages.Count):
        page = Document.Pages[i]
        if not page.Visible:
            continue
        if page.Title in skip_pages:
            continue
    
        filterPanel = page.FilterPanel
        for tableGroup in filterPanel.TableGroups:
            if not tableGroup.Visible:
                continue
    
            tableName = tableGroup.Name
            dataTable = Document.Data.Tables[tableName]
    
            # Top-level filters
            for fh in tableGroup.FilterHandles:
                if not fh.Visible:
                    continue
                f = fh.FilterReference
                if f is None or isinstance(f, CheckBoxHierarchyFilter):
                    continue
                colName = f.Name
                cnt = get_unique_count(dataTable, tableName, colName)
                if cnt >= 0:
                    prev = max_distinct_by_column.get(colName, -1)
                    if cnt > prev:
                        max_distinct_by_column[colName] = cnt
    
            # One-level subgroups
            for fg in tableGroup.SubGroups:
                for fh2 in fg.FilterHandles:
                    if not fh2.Visible:
                        continue
                    f2 = fh2.FilterReference
                    if f2 is None or isinstance(f2, CheckBoxHierarchyFilter):
                        continue
                    colName2 = f2.Name
                    cnt2 = get_unique_count(dataTable, tableName, colName2)
                    if cnt2 >= 0:
                        prev2 = max_distinct_by_column.get(colName2, -1)
                        if cnt2 > prev2:
                            max_distinct_by_column[colName2] = cnt2
    
    
    # ================================================================
    # Helper for PASS 2 (apply + log updates only)
    # ================================================================
    
    def process_filter_handle(fh, pageTitle, tableName, dataTable, updated_rows):
    
        if not fh.Visible:
            return
    
        f = fh.FilterReference
        if f is None or isinstance(f, CheckBoxHierarchyFilter):
            return
    
        colName = f.Name
        colName_lower = colName.lower()
    
        uniqueCount = get_unique_count(dataTable, tableName, colName)
        globalDistinct = max_distinct_by_column.get(colName, uniqueCount)
    
        expectedType = None
        expectedCheckbox = False
    
        # 1) Column-only and partial-match rules (highest priority)
        # 1a) Exact range_columns
        if colName in range_columns:
            expectedType = "RangeFilter"
    
        # 1b) Partial-match range exceptions (case-insensitive)
        elif any(sub.lower() in colName_lower for sub in partial_match_range_exceptions):
            expectedType = "RangeFilter"
    
        # 1c) Exact checkbox exceptions
        elif colName in exceptions:
            expectedCheckbox = True
            expectedType = "CheckBoxFilter"
    
        # 1d) Exact listbox columns
        elif colName in listbox_columns:
            expectedType = "ListBoxFilter"
    
        # 2) Fallback: globalDistinct logic
        else:
            # globalDistinct <= 7 => CheckBox, else ListBox
            expectedCheckbox = (globalDistinct != -1 and globalDistinct <= 7)
            expectedType = "CheckBoxFilter" if expectedCheckbox else "ListBoxFilter"
    
        # Current type BEFORE change
        if getattr(f, "TypeId", None) is None:
            currentType = ""
        else:
            currentType = str(
                f.TypeId.Name.replace("Spotfire.", "").replace("Filter", "")
            )
    
        expectedTypeForCompare = expectedType.replace("Filter", "")
    
        attemptedChange = False
        changeSucceeded = True
    
        # Apply only when different and TypeId exists
        if applyChanges and expectedTypeForCompare != "":
            if currentType != expectedTypeForCompare and getattr(f, "TypeId", None) is not None:
                attemptedChange = True
                try:
                    if expectedType == "RangeFilter":
                        f.TypeId = FilterTypeIdentifiers.RangeFilter
                    elif expectedType == "CheckBoxFilter":
                        f.TypeId = FilterTypeIdentifiers.CheckBoxFilter
                    elif expectedType == "ListBoxFilter":
                        f.TypeId = FilterTypeIdentifiers.ListBoxFilter
    
                    # refresh after change
                    currentType = str(
                        f.TypeId.Name.replace("Spotfire.", "").replace("Filter", "")
                    )
                    changeSucceeded = (currentType == expectedTypeForCompare)
                except:
                    changeSucceeded = False
    
        # Log only if a change was attempted
        if attemptedChange:
            if pageTitle not in updated_rows:
                updated_rows[pageTitle] = []
            color = "green" if changeSucceeded else "red"
            updated_rows[pageTitle].append({
                "Column": colName,
                "UniqueCount": uniqueCount,
                "CurrentType": currentType,
                "ExpectedType": expectedTypeForCompare,
                "Color": color,
                "Succeeded": changeSucceeded
            })
    
    
    # ================================================================
    # PASS 2: apply + collect updates
    # ================================================================
    
    for i in range(Document.Pages.Count):
        page = Document.Pages[i]
        if not page.Visible:
            continue
        if page.Title in skip_pages:
            continue
    
        pageTitle = page.Title
        filterPanel = page.FilterPanel
    
        for tableGroup in filterPanel.TableGroups:
            if not tableGroup.Visible:
                continue
    
            tableName = tableGroup.Name
            dataTable = Document.Data.Tables[tableName]
    
            for fh in tableGroup.FilterHandles:
                process_filter_handle(fh, pageTitle, tableName, dataTable, updated_rows)
    
            for fg in tableGroup.SubGroups:
                for fh2 in fg.FilterHandles:
                    process_filter_handle(fh2, pageTitle, tableName, dataTable, updated_rows)
    
    
    # ================================================================
    # BUILD HTML: updates only
    # ================================================================
    
    html_parts = []
    
    html_parts.append(
        '<span style="font-weight:bold;font-size:13px;color:blue;">Updated Filter Types</span><br>'
    )
    
    for i in range(Document.Pages.Count):
        page = Document.Pages[i]
        if not page.Visible or page.Title in skip_pages:
            continue
        pageTitle = page.Title
        if pageTitle not in updated_rows:
            continue
    
        html_parts.append(
            '<span style="font-weight:bold;font-size:12px;color:blue;">Page: {}</span><br>'.format(pageTitle)
        )
    
        for row in updated_rows[pageTitle]:
            colName = row["Column"]
            uniqueCount = row["UniqueCount"]
            currentType = row["CurrentType"]
            expectedType = row["ExpectedType"]
            color = row["Color"]  # green if success, red if failure
    
            line = (
                '<span style="margin-left:20px;">'
                'Filter: <b>{col}</b>; '
                'Unique Count: {uc}; '
                'Current Type: <span style="color:{c}">{ct}</span>; '
                'Expected Type: {et}'
                '</span><br>'
            ).format(
                col=colName,
                uc=uniqueCount,
                c=color,
                ct=currentType,
                et=expectedType
            )
            html_parts.append(line)
    
        html_parts.append('<br>')

    HtmlOut = "".join(html_parts)
    SpecLogOutput("Set Filter Types", HtmlOut)