Initial commit

This commit is contained in:
Augusto Gunsch
2021-12-25 19:30:14 -03:00
commit b918498930
33 changed files with 37200 additions and 0 deletions

113
src/database.rs Normal file
View File

@@ -0,0 +1,113 @@
use std::fs;
use reqwest;
use rusqlite::Connection;
use rusqlite::params;
use crate::language::Language;
use crate::entry::WiktionaryEntries;
/// A database of Wiktionary entries
pub struct WordDb {
connection: String
}
impl WordDb {
pub fn new(db_path: &str) -> Self {
Self {
connection: String::from(db_path)
}
}
pub fn connect(&self) -> Connection {
Connection::open(&self.connection).unwrap()
}
pub fn clean_tables(&mut self, lang: &Language) {
let mut connection = self.connect();
let transaction = connection.transaction().unwrap();
transaction.execute(&format!("DROP TABLE IF EXISTS {}_words", &lang.code), []).unwrap();
transaction.execute(&format!("DROP TABLE IF EXISTS {}_types", &lang.code), []).unwrap();
transaction.execute(&format!("
CREATE TABLE {}_types (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
name TINYTEXT UNIQUE NOT NULL
)", &lang.code), []).unwrap();
for type_ in &lang.types {
transaction.execute(&format!("
INSERT INTO {}_types ( name )
VALUES (
?
)", &lang.code), [type_]).unwrap();
}
transaction.execute(&format!("
CREATE TABLE {}_words (
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
word TINYTEXT UNIQUE NOT NULL,
type_id INTEGER NOT NULL,
content MEDIUMTEXT NOT NULL,
FOREIGN KEY (type_id)
REFERENCES {}_types (id)
)", &lang.code, &lang.code), []).unwrap();
transaction.commit().unwrap();
}
pub fn insert_entries(&mut self, lang: &Language, entries: WiktionaryEntries) {
let mut connection = self.connect();
let transaction = connection.transaction().unwrap();
for entry in entries {
transaction.execute(&format!("
INSERT INTO {}_words ( word, content, type_id )
VALUES (
?, ?,
(SELECT id FROM {}_types WHERE name = ?)
)", &lang.code, &lang.code),
params![entry.word,
entry.parsed_json.to_string(),
entry.type_]
).unwrap();
}
transaction.commit().unwrap();
}
pub async fn upgrade_lang(&mut self, lang: &Language) {
println!("Trying to read cached data...");
let cached_data = fs::read_to_string("Polish.json");
let mut request = None;
if let Err(_) = cached_data {
request = Some(reqwest::get("https://kaikki.org/dictionary/Polish/kaikki.org-dictionary-Polish.json"));
}
println!("Cleaning tables...");
self.clean_tables(lang);
let data;
if let Some(request) = request {
// Actually, the request was sent before
println!("Requesting data...");
data = request.await.unwrap().text().await.unwrap();
println!("Caching data...");
fs::write("Polish.json", &data).unwrap();
}
else {
data = cached_data.unwrap();
}
println!("Parsing data...");
let entries = WiktionaryEntries::parse_data(data)
.merge_duplicates();
println!("Inserting data...");
self.insert_entries(lang, entries);
println!("Done");
}
}

136
src/entry.rs Normal file
View File

@@ -0,0 +1,136 @@
use std::cmp;
use std::iter::IntoIterator;
use json::JsonValue::{Object, Short, Array};
use json::JsonValue;
#[derive (Clone)]
#[derive (Debug)]
pub struct WiktionaryEntry {
pub word: String,
pub type_: String,
pub parsed_json: JsonValue
}
impl cmp::PartialEq for WiktionaryEntry {
fn eq(&self, other: &Self) -> bool {
self.word.eq(&other.word)
}
}
impl cmp::Eq for WiktionaryEntry {}
impl cmp::PartialOrd for WiktionaryEntry {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
Some(self.cmp(other))
}
}
impl cmp::Ord for WiktionaryEntry {
fn cmp(&self, other: &Self) -> cmp::Ordering {
self.word.cmp(&other.word)
}
}
impl WiktionaryEntry {
fn merge(first: Self, second: Self) -> Self {
let output_parsed: JsonValue = match first.parsed_json {
Array(mut objs) => {
objs.push(second.parsed_json);
JsonValue::Array(objs)
},
Object(_) => {
let mut objs: Vec<JsonValue> = Vec::new();
objs.push(first.parsed_json);
objs.push(second.parsed_json);
JsonValue::Array(objs)
},
_ => panic!("Expected array or object, found {}", first.parsed_json.pretty(8))
};
Self {
word: first.word,
type_: first.type_,
parsed_json: output_parsed
}
}
pub fn parse(unparsed_json: &str) -> Self {
let json = json::parse(unparsed_json).unwrap();
let (word, type_) = match &json {
Object(o) => (
match o.get("word") {
Some(w) => match w {
Short(s) => s.to_string(),
JsonValue::String(s) => s.clone(),
_ => panic!("Not a string: {}", w.pretty(8))
},
None => panic!("No field 'word': {}", o.pretty(8))
},
match o.get("pos") {
Some(w) => match w {
Short(s) => s.to_string(),
JsonValue::String(s) => s.clone(),
_ => panic!("Not a string: {}", w.pretty(8))
},
None => panic!("No field 'pos': {}", o.pretty(8))
}
),
_ => panic!("Not an object: {}", json.pretty(8))
};
Self {
word,
type_,
parsed_json: json
}
}
}
pub struct WiktionaryEntries(Vec<WiktionaryEntry>);
impl WiktionaryEntries {
pub fn parse_data(data: String) -> Self {
let mut entries: Vec<WiktionaryEntry> = Vec::new();
for line in data.lines() {
entries.push(WiktionaryEntry::parse(line));
}
Self(entries)
}
pub fn merge_duplicates(mut self) -> Self {
self.0.sort();
let mut entries = self.0.into_iter();
let mut last_entry: WiktionaryEntry = entries.next().unwrap();
let mut new_entries = Vec::new();
for entry in entries {
if last_entry == entry {
last_entry = WiktionaryEntry::merge(last_entry, entry);
}
else {
new_entries.push(last_entry);
last_entry = entry;
}
}
new_entries.push(last_entry);
self.0 = new_entries;
self
}
}
impl IntoIterator for WiktionaryEntries {
type Item = WiktionaryEntry;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}

14
src/language.rs Normal file
View File

@@ -0,0 +1,14 @@
#[derive(Debug)]
pub struct Language {
pub code: String,
pub types: Vec<String>
}
impl Language {
pub fn new(code: &str, types: Vec<String>) -> Self {
Self {
code: String::from(code),
types
}
}
}

94
src/main.rs Normal file
View File

@@ -0,0 +1,94 @@
//mod database;
use rocket::routes;
use rocket::fs::FileServer;
use rocket::data::{Limits, ToByteUnit};
use clap::{App, AppSettings, Arg, SubCommand};
//use database::WordDb;
mod database;
mod language;
mod entry;
mod routes;
use database::WordDb;
use language::Language;
#[rocket::main]
async fn main() {
let matches = App::new("inflectived")
.version("0.1")
.author("Augusto Gunsch <augustogunsch@tutanota.com>")
.about("inflective daemon")
.subcommands(vec![
SubCommand::with_name("upgrade")
.about("Upgrade or install a language database")
.arg(
Arg::with_name("LANG")
.required(true)
.index(1)
.help("Language database to upgrade"),
),
SubCommand::with_name("run").about("Run the daemon").arg(
Arg::with_name("port")
.short("p")
.long("port")
.value_name("PORT")
.help("Port to run the server on")
.takes_value(true),
),
SubCommand::with_name("list")
.about("List language databases")
.arg(
Arg::with_name("installed")
.short("i")
.long("installed")
.help("List only installed databases"),
),
SubCommand::with_name("passwd").about("Set admin password for remote management"),
])
.setting(AppSettings::SubcommandRequiredElseHelp)
.get_matches();
let mut db = WordDb::new("test.db");
let lang = Language::new("polish",
vec![String::from("adj"),
String::from("noun"),
String::from("verb"),
String::from("character"),
String::from("suffix"),
String::from("prefix"),
String::from("conj"),
String::from("adv"),
String::from("infix"),
String::from("name"),
String::from("phrase"),
String::from("prep_phrase"),
String::from("intj"),
String::from("det"),
String::from("prep"),
String::from("proverb"),
String::from("abbrev"),
String::from("num"),
String::from("pron"),
String::from("punct"),
String::from("interfix"),
String::from("particle")]);
match matches.subcommand() {
("upgrade", _) => { db.upgrade_lang(&lang).await; },
("run", _) => {
let figment = rocket::Config::figment()
.merge(("address", "0.0.0.0"));
rocket::custom(figment)
.manage(db)
.mount("/static", FileServer::from("static/"))
.mount("/", routes![routes::get_word,
routes::get_word_like,
routes::frontend])
.launch()
.await.unwrap();
},
_ => {}
}
}

59
src/routes.rs Normal file
View File

@@ -0,0 +1,59 @@
use std::fs;
use rocket::get;
use rocket::State;
use rocket::http::Status;
use rocket::response::{content, status};
use rocket::serde::json::Json;
use rusqlite::params;
use crate::database::WordDb;
#[get("/frontend")]
pub fn frontend() -> Option<content::Html<String>> {
match fs::read_to_string("static/index.html") {
Ok(file) => Some(content::Html(file)),
Err(_) => None
}
}
#[get("/langs/<lang>/words/<word>")]
pub fn get_word(db: &State<WordDb>, lang: &str, word: &str) -> Option<content::Json<String>> {
let connection = db.connect();
let word = connection.query_row(&format!(
"SELECT content FROM {}_words WHERE word = ?",
lang),
[word],
|row| row.get(0)
);
match word {
Ok(w) => Some(content::Json(w)),
Err(_) => None
}
}
#[get("/langs/<lang>/words?<like>&<limit>&<offset>")]
pub fn get_word_like(db: &State<WordDb>, lang: &str, like: &str, limit: usize, offset: usize) -> Json<Vec<String>> {
let connection = db.connect();
let mut statement = connection.prepare(&format!(
"SELECT word
FROM {}_words
WHERE word LIKE ?
ORDER BY length(word) ASC
LIMIT ?
OFFSET ?",
lang)
).unwrap();
let mut rows = statement.query(params![format!("%{}%", like), limit, offset]).unwrap();
let mut words = Vec::new();
while let Some(row) = rows.next().unwrap() {
words.push(row.get(0).unwrap());
}
Json(words)
}