added a bit more description about each parameter
Browse files
app.py
CHANGED
|
@@ -695,7 +695,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
|
|
| 695 |
value=3,
|
| 696 |
step=1,
|
| 697 |
label="top_k",
|
| 698 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 699 |
)
|
| 700 |
min_score = gr.Slider(
|
| 701 |
-2.0,
|
|
@@ -703,7 +706,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
|
|
| 703 |
value=0.15,
|
| 704 |
step=0.01,
|
| 705 |
label="min_score",
|
| 706 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 707 |
)
|
| 708 |
csls_k = gr.Slider(
|
| 709 |
1,
|
|
@@ -711,7 +717,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
|
|
| 711 |
value=10,
|
| 712 |
step=1,
|
| 713 |
label="csls_k",
|
| 714 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 715 |
)
|
| 716 |
candidate_retrieval_k = gr.Slider(
|
| 717 |
1,
|
|
@@ -719,7 +728,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
|
|
| 719 |
value=9,
|
| 720 |
step=1,
|
| 721 |
label="candidate_retrieval_k",
|
| 722 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 723 |
)
|
| 724 |
csls_prefetch_k = gr.Slider(
|
| 725 |
10,
|
|
@@ -727,33 +739,51 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
|
|
| 727 |
value=50,
|
| 728 |
step=1,
|
| 729 |
label="csls_prefetch_k",
|
| 730 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 731 |
)
|
| 732 |
score_method = gr.Radio(
|
| 733 |
["csls", "cosine"],
|
| 734 |
value="csls",
|
| 735 |
label="score",
|
| 736 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 737 |
)
|
| 738 |
bidirectional = gr.Checkbox(
|
| 739 |
value=True,
|
| 740 |
label="bidirectional_consistency",
|
| 741 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 742 |
)
|
| 743 |
filter_stopwords = gr.Checkbox(
|
| 744 |
value=True,
|
| 745 |
label="filter stopwords",
|
| 746 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 747 |
)
|
| 748 |
filter_bad_tokens = gr.Checkbox(
|
| 749 |
value=True,
|
| 750 |
label="filter bad tokens",
|
| 751 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 752 |
)
|
| 753 |
use_surface = gr.Checkbox(
|
| 754 |
value=True,
|
| 755 |
label="show surface forms",
|
| 756 |
-
info=
|
|
|
|
|
|
|
|
|
|
| 757 |
)
|
| 758 |
|
| 759 |
with gr.Column(scale=2):
|
|
|
|
| 695 |
value=3,
|
| 696 |
step=1,
|
| 697 |
label="top_k",
|
| 698 |
+
info=(
|
| 699 |
+
"How many final translations to show per target language after "
|
| 700 |
+
"scoring and filters."
|
| 701 |
+
),
|
| 702 |
)
|
| 703 |
min_score = gr.Slider(
|
| 704 |
-2.0,
|
|
|
|
| 706 |
value=0.15,
|
| 707 |
step=0.01,
|
| 708 |
label="min_score",
|
| 709 |
+
info=(
|
| 710 |
+
"The minimum translation score to show. CSLS is a relative score, "
|
| 711 |
+
"so negative values are valid but usually allow weaker matches."
|
| 712 |
+
),
|
| 713 |
)
|
| 714 |
csls_k = gr.Slider(
|
| 715 |
1,
|
|
|
|
| 717 |
value=10,
|
| 718 |
step=1,
|
| 719 |
label="csls_k",
|
| 720 |
+
info=(
|
| 721 |
+
"How many neighbours CSLS compares against to avoid overrating "
|
| 722 |
+
"generic words in crowded vector areas."
|
| 723 |
+
),
|
| 724 |
)
|
| 725 |
candidate_retrieval_k = gr.Slider(
|
| 726 |
1,
|
|
|
|
| 728 |
value=9,
|
| 729 |
step=1,
|
| 730 |
label="candidate_retrieval_k",
|
| 731 |
+
info=(
|
| 732 |
+
"How many top candidates to inspect before removing bad tokens, "
|
| 733 |
+
"stopwords, low scores, or non-bidirectional matches."
|
| 734 |
+
),
|
| 735 |
)
|
| 736 |
csls_prefetch_k = gr.Slider(
|
| 737 |
10,
|
|
|
|
| 739 |
value=50,
|
| 740 |
step=1,
|
| 741 |
label="csls_prefetch_k",
|
| 742 |
+
info=(
|
| 743 |
+
"How many nearby candidates to fetch first so CSLS can score a "
|
| 744 |
+
"larger pool before the final shortlist."
|
| 745 |
+
),
|
| 746 |
)
|
| 747 |
score_method = gr.Radio(
|
| 748 |
["csls", "cosine"],
|
| 749 |
value="csls",
|
| 750 |
label="score",
|
| 751 |
+
info=(
|
| 752 |
+
"CSLS adjusts cosine similarity for multilingual lookup; cosine "
|
| 753 |
+
"shows plain vector closeness without that correction."
|
| 754 |
+
),
|
| 755 |
)
|
| 756 |
bidirectional = gr.Checkbox(
|
| 757 |
value=True,
|
| 758 |
label="bidirectional_consistency",
|
| 759 |
+
info=(
|
| 760 |
+
"Keep a translation only when the target word also retrieves the "
|
| 761 |
+
"query word back, which is stricter but cleaner."
|
| 762 |
+
),
|
| 763 |
)
|
| 764 |
filter_stopwords = gr.Checkbox(
|
| 765 |
value=True,
|
| 766 |
label="filter stopwords",
|
| 767 |
+
info=(
|
| 768 |
+
"Remove common function words such as articles, prepositions, and "
|
| 769 |
+
"pronouns from the displayed candidates."
|
| 770 |
+
),
|
| 771 |
)
|
| 772 |
filter_bad_tokens = gr.Checkbox(
|
| 773 |
value=True,
|
| 774 |
label="filter bad tokens",
|
| 775 |
+
info=(
|
| 776 |
+
"Remove candidates that look like noise, for example very short, "
|
| 777 |
+
"numeric, or punctuation-heavy tokens."
|
| 778 |
+
),
|
| 779 |
)
|
| 780 |
use_surface = gr.Checkbox(
|
| 781 |
value=True,
|
| 782 |
label="show surface forms",
|
| 783 |
+
info=(
|
| 784 |
+
"Show readable surface forms while keeping the normalized token "
|
| 785 |
+
"visible in the token column."
|
| 786 |
+
),
|
| 787 |
)
|
| 788 |
|
| 789 |
with gr.Column(scale=2):
|