Spaces:

impresso-project
/

multilingual-static-word-embeddings-demo

Sleeping

App Files Files Community

Maslionok commited on 12 days ago

Commit

88774ef

1 Parent(s): 81666dd

added a bit more description about each parameter

Browse files

Files changed (1) hide show

app.py +40 -10

app.py CHANGED Viewed

@@ -695,7 +695,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
                     value=3,
                     step=1,
                     label="top_k",
-                    info="How many translations to show for each target language.",
                 )
                 min_score = gr.Slider(
                     -2.0,
@@ -703,7 +706,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
                     value=0.15,
                     step=0.01,
                     label="min_score",
-                    info="The lowest score a candidate needs before it can be shown.",
                 )
                 csls_k = gr.Slider(
                     1,
@@ -711,7 +717,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
                     value=10,
                     step=1,
                     label="csls_k",
-                    info="How many nearby words CSLS uses to correct crowded areas of the space.",
                 )
                 candidate_retrieval_k = gr.Slider(
                     1,
@@ -719,7 +728,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
                     value=9,
                     step=1,
                     label="candidate_retrieval_k",
-                    info="How many initial neighbours to inspect before filters keep the best ones.",
                 )
                 csls_prefetch_k = gr.Slider(
                     10,
@@ -727,33 +739,51 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
                     value=50,
                     step=1,
                     label="csls_prefetch_k",
-                    info="How many extra neighbours to fetch so CSLS scoring has enough context.",
                 )
                 score_method = gr.Radio(
                     ["csls", "cosine"],
                     value="csls",
                     label="score",
-                    info="CSLS is usually better for translation; cosine is plain vector similarity.",
                 )
                 bidirectional = gr.Checkbox(
                     value=True,
                     label="bidirectional_consistency",
-                    info="Keep a translation only if it also points back to the query word.",
                 )
                 filter_stopwords = gr.Checkbox(
                     value=True,
                     label="filter stopwords",
-                    info="Hide very common words such as articles, prepositions, and pronouns.",
                 )
                 filter_bad_tokens = gr.Checkbox(
                     value=True,
                     label="filter bad tokens",
-                    info="Hide short, numeric, or punctuation-heavy tokens that are usually noise.",
                 )
                 use_surface = gr.Checkbox(
                     value=True,
                     label="show surface forms",
-                    info="Show original-looking word forms when this aligned space includes them.",
                 )
         with gr.Column(scale=2):

                     value=3,
                     step=1,
                     label="top_k",
+                    info=(
+                        "How many final translations to show per target language after "
+                        "scoring and filters."
+                    ),
                 )
                 min_score = gr.Slider(
                     -2.0,
                     value=0.15,
                     step=0.01,
                     label="min_score",
+                    info=(
+                        "The minimum translation score to show. CSLS is a relative score, "
+                        "so negative values are valid but usually allow weaker matches."
+                    ),
                 )
                 csls_k = gr.Slider(
                     1,
                     value=10,
                     step=1,
                     label="csls_k",
+                    info=(
+                        "How many neighbours CSLS compares against to avoid overrating "
+                        "generic words in crowded vector areas."
+                    ),
                 )
                 candidate_retrieval_k = gr.Slider(
                     1,
                     value=9,
                     step=1,
                     label="candidate_retrieval_k",
+                    info=(
+                        "How many top candidates to inspect before removing bad tokens, "
+                        "stopwords, low scores, or non-bidirectional matches."
+                    ),
                 )
                 csls_prefetch_k = gr.Slider(
                     10,
                     value=50,
                     step=1,
                     label="csls_prefetch_k",
+                    info=(
+                        "How many nearby candidates to fetch first so CSLS can score a "
+                        "larger pool before the final shortlist."
+                    ),
                 )
                 score_method = gr.Radio(
                     ["csls", "cosine"],
                     value="csls",
                     label="score",
+                    info=(
+                        "CSLS adjusts cosine similarity for multilingual lookup; cosine "
+                        "shows plain vector closeness without that correction."
+                    ),
                 )
                 bidirectional = gr.Checkbox(
                     value=True,
                     label="bidirectional_consistency",
+                    info=(
+                        "Keep a translation only when the target word also retrieves the "
+                        "query word back, which is stricter but cleaner."
+                    ),
                 )
                 filter_stopwords = gr.Checkbox(
                     value=True,
                     label="filter stopwords",
+                    info=(
+                        "Remove common function words such as articles, prepositions, and "
+                        "pronouns from the displayed candidates."
+                    ),
                 )
                 filter_bad_tokens = gr.Checkbox(
                     value=True,
                     label="filter bad tokens",
+                    info=(
+                        "Remove candidates that look like noise, for example very short, "
+                        "numeric, or punctuation-heavy tokens."
+                    ),
                 )
                 use_surface = gr.Checkbox(
                     value=True,
                     label="show surface forms",
+                    info=(
+                        "Show readable surface forms while keeping the normalized token "
+                        "visible in the token column."
+                    ),
                 )
         with gr.Column(scale=2):