Spaces:

stable-bias
/

diffusion-face-clustering

Runtime error

App Files Files Community

anonymous-authors commited on Aug 11, 2023

Commit

e79434d

1 Parent(s): 7cf5700

Create app.py

Browse files

Files changed (1) hide show

app.py +289 -0

app.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import json
+import gradio as gr
+import os
+from PIL import Image
+import plotly.graph_objects as go
+import plotly.express as px
+import operator
+TITLE = "Identity Representation in Diffusion Models"
+_INTRO = """
+# Identity Representation in Diffusion Models
+Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/tti-bias/DiffusionBiasExplorer)!
+This demo showcases patterns in images generated by Stable Diffusion and Dalle-2 systems.
+Specifically, images obtained from prompt inputs that span various gender- and ethnicity-related terms are clustered to show how those shape visual representations (more details below).
+We encourage users to take advantage of this app to explore those trends, for example through the lens of the following questions:
+- Find the cluster that has the most prompts denoting a gender or ethnicity that you identify with. Do you think the generated images look like you?
+- Find two clusters that have a similar distribution of gender terms but different distributions of ethnicity terms. Do you see any meaningful differences in how gender is visually represented?
+- Do you find that some ethnicity terms lead to more stereotypical visual representations than others?
+- Do you find that some gender terms lead to more stereotypical visual representations than others?
+These questions only scratch the surface of what we can learn from demos like this one,
+let us know what you find [in the discussions tab](https://huggingface.co/spaces/tti-bias/DiffusionFaceClustering/discussions),
+or if you think of other relevant questions!
+"""
+_CONTEXT = """
+##### How do diffusion-based models represent gender and ethnicity?
+In order to evaluate the *social biases* that Text-to-Image (TTI) systems may reproduce or exacerbate,
+we need to first understand how the visual representations they generate relate to notions of gender and ethnicity.
+These two aspects of a person's identity, however, ar known as **socialy constructed characteristics**:
+that is to say, gender and ethnicity only exist in interactions between people, they do not have an independent existence based solely on physical (or visual) attributes.
+This means that while we can characterize trends in how the models associate visual features with specific *identity terms in the generation prompts*,
+we should not assign a specific gender or ethnicity to a synthetic figure generated by an ML model.
+In this app, we instead take a 2-step clustering-based approach. First, we generate 680 images for each model by varying mentions of terms that denote gender or ethnicity in the prompts.
+Then, we use a [VQA-based model](https://huggingface.co/Salesforce/blip-vqa-base) to cluster these images at different granularities (12, 24, or 48 clusters).
+Exploring these clusters allows us to examine trends in the models' associations between visual features and textual representation of social attributes.
+**Note:** this demo was developed with a limited set of gender- and ethnicity-related terms that are more relevant to the US context as a first approach,
+so users may not always find themselves represented.
+"""
+clusters_12 = json.load(open("clusters/id_all_blip_clusters_12.json"))
+clusters_24 = json.load(open("clusters/id_all_blip_clusters_24.json"))
+clusters_48 = json.load(open("clusters/id_all_blip_clusters_48.json"))
+clusters_by_size = {
+    12: clusters_12,
+    24: clusters_24,
+    48: clusters_48,
+}
+def to_string(label):
+    if label == "SD_2":
+        label = "Stable Diffusion 2.0"
+    elif label == "SD_14":
+        label = "Stable Diffusion 1.4"
+    elif label == "DallE":
+        label = "Dall-E 2"
+    elif label == "non-binary":
+        label = "non-binary person"
+    elif label == "person":
+        label = "<i>unmarked</i> (person)"
+    elif label == "":
+        label = "<i>unmarked</i> ()"
+    elif label == "gender":
+        label = "gender term"
+    return label
+def summarize_clusters(clusters_list, max_terms=3):
+    for cl_id, cl_dict in enumerate(clusters_list):
+        total = len(cl_dict["img_path_list"])
+        gdr_list = cl_dict["labels_gender"]
+        eth_list = cl_dict["labels_ethnicity"]
+        cl_dict["sentence_desc"] = (
+            f"Cluster {cl_id} | \t"
+            + f"gender terms incl.: {gdr_list[0][0].replace('person', 'unmarked(gender)')}"
+            + (
+                f" - {gdr_list[1][0].replace('person', 'unmarked(gender)')} | "
+                if len(gdr_list) > 1
+                else " | "
+            )
+            + f"ethnicity terms incl.: {'unmarked(ethnicity)' if eth_list[0][0] == '' else eth_list[0][0]}"
+            + (
+                f" - {'unmarked(ethnicity)' if eth_list[1][0] == '' else eth_list[1][0]}"
+                if len(eth_list) > 1
+                else ""
+            )
+        )
+        cl_dict["summary_desc"] = (
+            f"Cluster {cl_id} has {total} images.\n"
+            + f"- The most represented gender terms are {gdr_list[0][0].replace('person', 'unmarked')} ({gdr_list[0][1]})"
+            + (
+                f" and {gdr_list[1][0].replace('person', 'unmarked')} ({gdr_list[1][1]}).\n"
+                if len(gdr_list) > 1
+                else ".\n"
+            )
+            + f"- The most represented ethnicity terms are {'unmarked' if eth_list[0][0] == '' else eth_list[0][0]} ({eth_list[0][1]})"
+            + (
+                f" and {'unmarked' if eth_list[1][0] == '' else eth_list[1][0]} ({eth_list[1][1]}).\n"
+                if len(eth_list) > 1
+                else ".\n"
+            )
+            + "See below for a more detailed description."
+        )
+for _, clusters_list in clusters_by_size.items():
+    summarize_clusters(clusters_list)
+dropdown_descs = dict(
+    (num_clusters, [cl_dct["sentence_desc"] for cl_dct in clusters_list])
+    for num_clusters, clusters_list in clusters_by_size.items()
+)
+def describe_cluster(cl_dict, block="label", max_items=4):
+    labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
+    labels_values.reverse()
+    total = float(sum(cl_dict.values()))
+    lv_prcnt = list(
+        (item[0], round(item[1] * 100 / total, 0)) for item in labels_values
+    )
+    top_label = lv_prcnt[0][0]
+    description_string = (
+        "<span>The most represented %s is <b>%s</b>, making up about <b>%d%%</b> of the cluster.</span>"
+        % (to_string(block), to_string(top_label), lv_prcnt[0][1])
+    )
+    description_string += "<p>This is followed by: "
+    for lv in lv_prcnt[1 : min(len(lv_prcnt), 1 + max_items)]:
+        description_string += "<BR/><b>%s:</b> %d%%" % (to_string(lv[0]), lv[1])
+    if len(lv_prcnt) > max_items + 1:
+        description_string += "<BR/><b> - Other terms:</b> %d%%" % (
+            sum(lv[1] for lv in lv_prcnt[max_items + 1 :]),
+        )
+    description_string += "</p>"
+    return description_string
+def show_cluster(cl_id, num_clusters):
+    if not cl_id:
+        cl_id = 0
+    else:
+        cl_id = (
+            dropdown_descs[num_clusters].index(cl_id)
+            if cl_id in dropdown_descs[num_clusters]
+            else 0
+        )
+    if not num_clusters:
+        num_clusters = 12
+    cl_dct = clusters_by_size[num_clusters][cl_id]
+    images = []
+    for i in range(8):
+        img_path = "/".join(
+            [st.replace("/", "") for st in cl_dct["img_path_list"][i].split("//")][3:]
+        )
+        im = Image.open(img_path)
+        # .resize((256, 256))
+        caption = (
+            "_".join([img_path.split("/")[0], img_path.split("/")[-1]])
+            .replace("Photo_portrait_of_an_", "")
+            .replace("Photo_portrait_of_a_", "")
+            .replace("SD_v2_random_seeds_identity_", "(SD v.2) ")
+            .replace("dataset-identities-dalle2_", "(Dall-E 2) ")
+            .replace("SD_v1.4_random_seeds_identity_", "(SD v.1.4) ")
+            .replace("_", " ")
+        )
+        images.append((im, caption))
+    model_fig = go.Figure()
+    model_fig.add_trace(
+        go.Pie(
+            labels=list(dict(cl_dct["labels_model"]).keys()),
+            values=list(dict(cl_dct["labels_model"]).values()),
+        )
+    )
+    model_description = describe_cluster(dict(cl_dct["labels_model"]), "system")
+    gender_fig = go.Figure()
+    gender_fig.add_trace(
+        go.Pie(
+            labels=list(dict(cl_dct["labels_gender"]).keys()),
+            values=list(dict(cl_dct["labels_gender"]).values()),
+        )
+    )
+    gender_description = describe_cluster(dict(cl_dct["labels_gender"]), "gender")
+    ethnicity_fig = go.Figure()
+    ethnicity_fig.add_trace(
+        go.Bar(
+            x=list(dict(cl_dct["labels_ethnicity"]).keys()),
+            y=list(dict(cl_dct["labels_ethnicity"]).values()),
+            marker_color=px.colors.qualitative.G10,
+        )
+    )
+    ethnicity_description = describe_cluster(
+        dict(cl_dct["labels_ethnicity"]), "ethnicity"
+    )
+    return (
+        clusters_by_size[num_clusters][cl_id]["summary_desc"],
+        gender_fig,
+        gender_description,
+        model_fig,
+        model_description,
+        ethnicity_fig,
+        ethnicity_description,
+        images,
+        gr.update(choices=dropdown_descs[num_clusters]),
+        #        gr.update(choices=[i for i in range(num_clusters)]),
+    )
+with gr.Blocks(title=TITLE) as demo:
+    gr.Markdown(_INTRO)
+    with gr.Accordion(
+        "How do diffusion-based models represent gender and ethnicity?", open =False
+    ):
+        gr.Markdown(_CONTEXT)
+    gr.HTML(
+        """<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image systems and may depict offensive stereotypes or contain explicit content.</span>"""
+    )
+    num_clusters = gr.Radio(
+        [12, 24, 48],
+        value=12,
+        label="How many clusters do you want to make from the data?",
+    )
+    with gr.Row():
+        with gr.Column():
+            cluster_id = gr.Dropdown(
+                choices=dropdown_descs[num_clusters.value],
+                value=0,
+                label="Select cluster to visualize:",
+            )
+            a = gr.Text(label="Cluster summary")
+        with gr.Column():
+            gallery = gr.Gallery(label="Most representative images in cluster").style(
+                grid=[2, 4], height="auto"
+            )
+    with gr.Row():
+        with gr.Column():
+            c = gr.Plot(label="How many images from each system?")
+            c_desc = gr.HTML(label="")
+        with gr.Column(scale=1):
+            b = gr.Plot(label="Which gender terms are represented?")
+            b_desc = gr.HTML(label="")
+        with gr.Column(scale=2):
+            d = gr.Plot(label="Which ethnicity terms are present?")
+            d_desc = gr.HTML(label="")
+    gr.Markdown(
+        "### Plot Descriptions \n\n"
+        + " The **System makeup** plot (*left*) corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2.\n\n"
+        + " The **Gender term makeup** plot (*middle*) shows the number of images based on the input prompts that used the phrases man, woman, non-binary person, and person (unmarked) to describe the figure's gender.\n\n"
+        + " The **Ethnicity label makeup** plot (*right*) corresponds to the number of images from each of the 18 ethnicity descriptions used in the prompts. A blank value denotes unmarked ethnicity.\n\n"
+    )
+    demo.load(
+        fn=show_cluster,
+        inputs=[cluster_id, num_clusters],
+        outputs=[a, b, b_desc, c, c_desc, d, d_desc, gallery, cluster_id],
+    )
+    num_clusters.change(
+        fn=show_cluster,
+        inputs=[cluster_id, num_clusters],
+        outputs=[
+            a,
+            b,
+            b_desc,
+            c,
+            c_desc,
+            d,
+            d_desc,
+            gallery,
+            cluster_id,
+        ],
+    )
+    cluster_id.change(
+        fn=show_cluster,
+        inputs=[cluster_id, num_clusters],
+        outputs=[a, b, b_desc, c, c_desc, d, d_desc, gallery, cluster_id],
+    )
+if __name__ == "__main__":
+    demo.queue().launch(debug=True)