Maslionok commited on
Commit
88774ef
·
1 Parent(s): 81666dd

added a bit more description about each parameter

Browse files
Files changed (1) hide show
  1. app.py +40 -10
app.py CHANGED
@@ -695,7 +695,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
695
  value=3,
696
  step=1,
697
  label="top_k",
698
- info="How many translations to show for each target language.",
 
 
 
699
  )
700
  min_score = gr.Slider(
701
  -2.0,
@@ -703,7 +706,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
703
  value=0.15,
704
  step=0.01,
705
  label="min_score",
706
- info="The lowest score a candidate needs before it can be shown.",
 
 
 
707
  )
708
  csls_k = gr.Slider(
709
  1,
@@ -711,7 +717,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
711
  value=10,
712
  step=1,
713
  label="csls_k",
714
- info="How many nearby words CSLS uses to correct crowded areas of the space.",
 
 
 
715
  )
716
  candidate_retrieval_k = gr.Slider(
717
  1,
@@ -719,7 +728,10 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
719
  value=9,
720
  step=1,
721
  label="candidate_retrieval_k",
722
- info="How many initial neighbours to inspect before filters keep the best ones.",
 
 
 
723
  )
724
  csls_prefetch_k = gr.Slider(
725
  10,
@@ -727,33 +739,51 @@ with gr.Blocks(title="Multilingual Dictionary Explorer", css=CSS) as demo:
727
  value=50,
728
  step=1,
729
  label="csls_prefetch_k",
730
- info="How many extra neighbours to fetch so CSLS scoring has enough context.",
 
 
 
731
  )
732
  score_method = gr.Radio(
733
  ["csls", "cosine"],
734
  value="csls",
735
  label="score",
736
- info="CSLS is usually better for translation; cosine is plain vector similarity.",
 
 
 
737
  )
738
  bidirectional = gr.Checkbox(
739
  value=True,
740
  label="bidirectional_consistency",
741
- info="Keep a translation only if it also points back to the query word.",
 
 
 
742
  )
743
  filter_stopwords = gr.Checkbox(
744
  value=True,
745
  label="filter stopwords",
746
- info="Hide very common words such as articles, prepositions, and pronouns.",
 
 
 
747
  )
748
  filter_bad_tokens = gr.Checkbox(
749
  value=True,
750
  label="filter bad tokens",
751
- info="Hide short, numeric, or punctuation-heavy tokens that are usually noise.",
 
 
 
752
  )
753
  use_surface = gr.Checkbox(
754
  value=True,
755
  label="show surface forms",
756
- info="Show original-looking word forms when this aligned space includes them.",
 
 
 
757
  )
758
 
759
  with gr.Column(scale=2):
 
695
  value=3,
696
  step=1,
697
  label="top_k",
698
+ info=(
699
+ "How many final translations to show per target language after "
700
+ "scoring and filters."
701
+ ),
702
  )
703
  min_score = gr.Slider(
704
  -2.0,
 
706
  value=0.15,
707
  step=0.01,
708
  label="min_score",
709
+ info=(
710
+ "The minimum translation score to show. CSLS is a relative score, "
711
+ "so negative values are valid but usually allow weaker matches."
712
+ ),
713
  )
714
  csls_k = gr.Slider(
715
  1,
 
717
  value=10,
718
  step=1,
719
  label="csls_k",
720
+ info=(
721
+ "How many neighbours CSLS compares against to avoid overrating "
722
+ "generic words in crowded vector areas."
723
+ ),
724
  )
725
  candidate_retrieval_k = gr.Slider(
726
  1,
 
728
  value=9,
729
  step=1,
730
  label="candidate_retrieval_k",
731
+ info=(
732
+ "How many top candidates to inspect before removing bad tokens, "
733
+ "stopwords, low scores, or non-bidirectional matches."
734
+ ),
735
  )
736
  csls_prefetch_k = gr.Slider(
737
  10,
 
739
  value=50,
740
  step=1,
741
  label="csls_prefetch_k",
742
+ info=(
743
+ "How many nearby candidates to fetch first so CSLS can score a "
744
+ "larger pool before the final shortlist."
745
+ ),
746
  )
747
  score_method = gr.Radio(
748
  ["csls", "cosine"],
749
  value="csls",
750
  label="score",
751
+ info=(
752
+ "CSLS adjusts cosine similarity for multilingual lookup; cosine "
753
+ "shows plain vector closeness without that correction."
754
+ ),
755
  )
756
  bidirectional = gr.Checkbox(
757
  value=True,
758
  label="bidirectional_consistency",
759
+ info=(
760
+ "Keep a translation only when the target word also retrieves the "
761
+ "query word back, which is stricter but cleaner."
762
+ ),
763
  )
764
  filter_stopwords = gr.Checkbox(
765
  value=True,
766
  label="filter stopwords",
767
+ info=(
768
+ "Remove common function words such as articles, prepositions, and "
769
+ "pronouns from the displayed candidates."
770
+ ),
771
  )
772
  filter_bad_tokens = gr.Checkbox(
773
  value=True,
774
  label="filter bad tokens",
775
+ info=(
776
+ "Remove candidates that look like noise, for example very short, "
777
+ "numeric, or punctuation-heavy tokens."
778
+ ),
779
  )
780
  use_surface = gr.Checkbox(
781
  value=True,
782
  label="show surface forms",
783
+ info=(
784
+ "Show readable surface forms while keeping the normalized token "
785
+ "visible in the token column."
786
+ ),
787
  )
788
 
789
  with gr.Column(scale=2):