Improve multi language support

This commit is contained in:
Augusto Gunsch 2022-01-23 10:37:38 -03:00
parent 0ee20773bb
commit db3bea33e6
10 changed files with 78 additions and 47 deletions

View File

@ -1,4 +1,5 @@
use std::fs;
use std::fs::File;
use std::collections::HashSet;
use std::fmt;
@ -41,20 +42,22 @@ impl WordDb {
let conn = Connection::open(&db_path).unwrap();
let mut statement = conn.prepare(
let mut installed_langs: Vec<Language> = Vec::new();
let statement = conn.prepare(
"SELECT code, name, major, minor, patch
FROM langs"
).unwrap();
);
if let Ok(mut statement) = statement {
let mut rows = statement.query([]).unwrap();
let mut installed_langs: Vec<Language> = Vec::new();
while let Some(row) = rows.next().unwrap() {
installed_langs.push(Language::from_row(&row));
}
installed_langs.sort();
}
Self {
db_path,
@ -144,7 +147,7 @@ impl WordDb {
)", &lang.code), []).unwrap();
transaction.execute(&format!("
CREATE INDEX word_index
CREATE INDEX {0}_word_index
ON {0}_words (word)
", &lang.code), []).unwrap();
@ -161,7 +164,7 @@ impl WordDb {
(SELECT id FROM {0}_types WHERE name = ?)
)", &lang.code),
params![entry.word,
entry.parsed_json.to_string(),
entry.unparsed_json,
entry.type_]
).unwrap();
}
@ -192,7 +195,7 @@ impl WordDb {
).unwrap();
for entry in entries.iter() {
if let Some(forms) = entry.parsed_json["forms"].as_array() {
if let Some(forms) = entry.parse_json()["forms"].as_array() {
let mut forms_vec: Vec<Form> = Vec::new();
for form in forms {
@ -219,7 +222,10 @@ impl WordDb {
let mut senses: Vec<Value> = Vec::new();
for form in forms {
let mut tags = form.tags.clone();
let mut tags = match &form.tags {
Some(tags) => tags.clone(),
None => Vec::new()
};
tags.push(String::from("form-of"));
tags.push(String::from("auto-generated"));
@ -230,7 +236,10 @@ impl WordDb {
}
],
"glosses": [
form.tags.join(" ")
match &form.tags {
Some(tags) => tags.join(" "),
None => String::from("")
}
],
"tags": tags
}));
@ -244,7 +253,7 @@ impl WordDb {
let new_entry = WiktionaryEntry::new(forms[0].form.clone(),
entry.type_.clone(),
entry_json);
entry_json.to_string());
self.insert_entry(&transaction, lang, &new_entry);
}
@ -293,7 +302,7 @@ impl WordDb {
println!("Trying to read cached data...");
let cache_file = format!("{}/{}.json", CACHE_DIR, &lang.name);
let cached_data = fs::read_to_string(&cache_file);
let mut cached_data = File::open(&cache_file);
let mut request = None;
if let Err(_) = cached_data {
@ -305,23 +314,20 @@ impl WordDb {
println!("Cleaning tables...");
self.clean_tables(lang)?;
let data;
if let Some(request) = request {
// Actually, the request was sent before
println!("Requesting data...");
data = request.await.unwrap().text().await.unwrap();
if cfg!(unix) {
let data = request.await.unwrap().text().await.unwrap();
println!("Caching data...");
util::try_create_dir(CACHE_DIR);
fs::write(&cache_file, &data).unwrap();
}
}
else {
data = cached_data.unwrap();
cached_data = File::open(&cache_file);
}
println!("Parsing data...");
let entries = WiktionaryEntries::parse_data(data);
let entries = WiktionaryEntries::parse_data(cached_data.unwrap());
println!("Inserting types...");
self.insert_types(lang, &entries);

View File

@ -1,3 +1,5 @@
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::cmp;
use std::slice::Iter;
use serde_json::Value;
@ -7,7 +9,7 @@ use serde::Deserialize;
pub struct WiktionaryEntry {
pub word: String,
pub type_: String,
pub parsed_json: Value,
pub unparsed_json: String
}
impl cmp::PartialEq for WiktionaryEntry {
@ -32,6 +34,8 @@ impl cmp::Ord for WiktionaryEntry {
impl WiktionaryEntry {
pub fn parse(unparsed_json: &str) -> Self {
// We could keep this in memory, but for bigger language databases
// it's going to crash the program
let json: Value = serde_json::from_str(unparsed_json).unwrap();
let word = String::from(json["word"].as_str().unwrap());
@ -40,27 +44,33 @@ impl WiktionaryEntry {
Self {
word,
type_,
parsed_json: json
unparsed_json: String::from(unparsed_json)
}
}
pub fn new(word: String, type_: String, parsed_json: Value) -> Self {
pub fn new(word: String, type_: String, unparsed_json: String) -> Self {
Self {
word,
type_,
parsed_json
unparsed_json
}
}
pub fn parse_json(&self) -> Value {
serde_json::from_str(&self.unparsed_json).unwrap()
}
}
pub struct WiktionaryEntries(Vec<WiktionaryEntry>);
impl WiktionaryEntries {
pub fn parse_data(data: String) -> Self {
pub fn parse_data(data: File) -> Self {
let reader = BufReader::new(data);
let mut entries: Vec<WiktionaryEntry> = Vec::new();
for line in data.lines() {
entries.push(WiktionaryEntry::parse(line));
for line in reader.lines() {
entries.push(WiktionaryEntry::parse(&line.unwrap()));
}
Self(entries)
@ -74,7 +84,7 @@ impl WiktionaryEntries {
#[derive(Debug, Deserialize)]
pub struct Form {
pub form: String,
pub tags: Vec<String>,
pub tags: Option<Vec<String>>,
pub source: Option<String>,
}

View File

@ -8,23 +8,31 @@ $(document).ready(() => {
success: data => {
langs = data;
$('#langs').html(data.map(lang => `<option value="${lang.code}">${lang.name}</option>`));
const selectedLangCode = localStorage.selectedLangCode;
let options = '';
langs.forEach(lang => {
if(selectedLangCode && lang.code == selectedLangCode) {
options += `<option value="${lang.code}" selected>${lang.name}</option>`;
} else {
options += `<option value="${lang.code}">${lang.name}</option>`;
}
});
$('#langs').html(options);
setLang($('#langs').val());
}
});
$('#lang').on('change', e => {
console.log(e.target.value);
let langCode = e.target.value;
setLang(code);
$('#langs').on('change', e => {
setLang(e.target.value);
});
function setLang(code) {
let lang = langs.find(lang => lang.code == code);
const lang = langs.find(lang => lang.code == code);
localStorage.selectedLangCode = code;
selectedLang = lang;
$.ajax({
@ -64,13 +72,13 @@ $(document).ready(() => {
searchForm.on('submit', e => {
e.preventDefault();
let word = e.target[0].value
const word = e.target[0].value
window.location.hash = `#${word}`;
});
function getWord() {
let word = window.location.hash.replace('#', '');
const word = window.location.hash.replace('#', '');
if (word) {
document.title = `Inflective - ${decodeURIComponent(word)}`;

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[] Italian.json

View File

@ -0,0 +1 @@
[]