init commit

main
Oystein Kristoffer Tveit 2022-06-20 20:06:07 +02:00
commit 5cf0b95d8b
21 changed files with 3043 additions and 0 deletions

BIN
.github/icon.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

13
.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
# Temporary storage for data ingestion files
/data
# Files and directories created by pub.
.dart_tool/
.packages
# Conventional directory for build output.
/build/
main.db
# Nix
/result

8
.sqlfluff Normal file
View File

@ -0,0 +1,8 @@
[sqlfluff]
dialect = sqlite
exclude_rules = L003
[sqlfluff:rules]
tab_space_size = 2
max_line_length = 80
indent_unit = space

5
README.md Normal file
View File

@ -0,0 +1,5 @@
# ja_db
[![built with nix](https://builtwithnix.org/badge.svg)](https://builtwithnix.org)

0
bin/common.dart Normal file
View File

16
bin/ja_db.dart Normal file
View File

@ -0,0 +1,16 @@
import 'dart:io';
import 'package:sqflite_common_ffi/sqflite_ffi.dart';
import 'jmdict/parser.dart';
import 'kanjidic/parser.dart';
import 'radkfile/parser.dart';
Future<void> main(List<String> arguments) async {
final db = await databaseFactoryFfi
.openDatabase(Directory.current.uri.resolve('main.db').path);
await addDataFromJMdict(db);
await addDataFromRADKFILE(db);
await addDataFromKANJIDIC(db);
}

235
bin/jmdict/objects.dart Normal file
View File

@ -0,0 +1,235 @@
import '../common.dart';
import '../objects.dart';
class TableNames {
static const String entry = 'JMdict_Entry';
static const String entryByKana = 'JMdict_EntryByKana';
static const String entryByEnglish = 'JMdict_EntryByEnglish';
static const String kanjiElement = 'JMdict_KanjiElement';
static const String kanjiInfo = 'JMdict_KanjiElementInfo';
static const String readingElement = 'JMdict_ReadingElement';
static const String readingInfo = 'JMdict_ReadingElementInfo';
static const String readingRestriction = 'JMdict_ReadingElementRestriction';
static const String sense = 'JMdict_Sense';
static const String senseAntonyms = 'JMdict_SenseAntonym';
static const String senseDialect = 'JMdict_SenseDialect';
static const String senseField = 'JMdict_SenseField';
static const String senseGlossary = 'JMdict_SenseGlossary';
static const String senseInfo = 'JMdict_SenseInfo';
static const String senseLanguageSource = 'JMdict_SenseLanguageSource';
static const String senseMisc = 'JMdict_SenseMisc';
static const String sensePOS = 'JMdict_SensePOS';
static const String senseRestrictedToKanji = 'JMdict_SenseRestrictedToKanji';
static const String senseRestrictedToReading = 'JMdict_SenseRestrictedToReading';
static const String senseSeeAlso = 'JMdict_SenseSeeAlso';
}
abstract class Element extends SQLWritable {
final String reading;
final int? news;
final int? ichi;
final int? spec;
final int? gai;
final int? nf;
const Element({
required this.reading,
this.news,
this.ichi,
this.spec,
this.gai,
this.nf,
});
Map<String, Object?> get sqlValue => {
'reading': reading,
'news': news,
'ichi': ichi,
'spec': spec,
'gai': gai,
'nf': nf,
};
}
class KanjiElement extends Element {
List<String> info;
KanjiElement({
this.info = const [],
required String reading,
int? news,
int? ichi,
int? spec,
int? gai,
int? nf,
}) : super(
reading: reading,
news: news,
ichi: ichi,
spec: spec,
gai: gai,
nf: nf,
);
}
class ReadingElement extends Element {
List<String> info;
List<String> restrictions;
ReadingElement({
this.info = const [],
this.restrictions = const [],
required String reading,
int? news,
int? ichi,
int? spec,
int? gai,
int? nf,
}) : super(
reading: reading,
news: news,
ichi: ichi,
spec: spec,
gai: gai,
nf: nf,
);
}
class LanguageSource extends SQLWritable {
final String language;
final String? phrase;
final bool fullyDescribesSense;
final bool constructedFromSmallerWords;
const LanguageSource({
required this.language,
this.phrase,
this.fullyDescribesSense = true,
this.constructedFromSmallerWords = false,
});
@override
Map<String, Object?> get sqlValue => {
'language': language,
'phrase': phrase,
'fullyDescribesSense': fullyDescribesSense,
'constructedFromSmallerWords': constructedFromSmallerWords,
};
}
class Glossary extends SQLWritable {
final String language;
final String phrase;
final String? type;
const Glossary({
required this.language,
required this.phrase,
this.type,
});
Map<String, Object?> get sqlValue => {
'language': language,
'phrase': phrase,
'type': type,
};
}
final kanaRegex =
RegExp(r'^[\p{Script=Katakana}\p{Script=Hiragana}ー]+$', unicode: true);
class XRefParts {
final String? kanjiRef;
final String? readingRef;
final int? senseNum;
const XRefParts({
this.kanjiRef,
this.readingRef,
this.senseNum,
}) : assert(kanjiRef != null || readingRef != null);
factory XRefParts.fromString(String s) {
final parts = s.split('');
if (parts.length == 1) {
if (parts[0].contains(kanaRegex)) {
return XRefParts(readingRef: parts[0]);
}
return XRefParts(kanjiRef: parts[0]);
} else if (parts.length == 2) {
if (int.tryParse(parts[1]) != null) {
if (parts[0].contains(kanaRegex)) {
return XRefParts(readingRef: parts[0], senseNum: int.parse(parts[1]));
}
return XRefParts(kanjiRef: parts[0], senseNum: int.parse(parts[1]));
}
return XRefParts(kanjiRef: parts[0], readingRef: parts[1]);
} else if (parts.length == 3) {
return XRefParts(
kanjiRef: parts[0],
readingRef: parts[1],
senseNum: int.parse(parts[2]),
);
}
return XRefParts();
}
}
class XRef {
final String entryId;
final String reading;
const XRef({
required this.entryId,
required this.reading,
});
}
class Sense extends SQLWritable {
final int id;
final List<XRefParts> antonyms;
final List<String> dialects;
final List<String> fields;
final List<String> info;
final List<LanguageSource> languageSource;
final List<Glossary> glossary;
final List<String> misc;
final List<String> pos;
final List<String> restrictedToKanji;
final List<String> restrictedToReading;
final List<XRefParts> seeAlso;
const Sense({
required this.id,
this.antonyms = const [],
this.dialects = const [],
this.fields = const [],
this.info = const [],
this.languageSource = const [],
this.glossary = const [],
this.misc = const [],
this.pos = const [],
this.restrictedToKanji = const [],
this.restrictedToReading = const [],
this.seeAlso = const [],
});
@override
Map<String, Object?> get sqlValue => {};
}
class Entry extends SQLWritable {
final int id;
final List<KanjiElement> kanji;
final List<ReadingElement> readings;
final List<Sense> senses;
const Entry({
required this.id,
required this.kanji,
required this.readings,
required this.senses,
});
Map<String, Object?> get sqlValue => {'id': id};
}

346
bin/jmdict/parser.dart Normal file
View File

@ -0,0 +1,346 @@
import 'dart:collection';
import 'dart:io';
import 'package:sqflite_common/sqlite_api.dart';
import 'package:xml/xml.dart';
import '../romaji_transliteration.dart';
import 'objects.dart';
List<int?> getPriNums(XmlElement e, String prefix) {
int? news, ichi, spec, gai, nf;
for (final pri in e.findElements('${prefix}_pri')) {
final txt = pri.innerText;
if (txt.startsWith('news'))
news = int.parse(txt.substring(4));
else if (txt.startsWith('ichi'))
ichi = int.parse(txt.substring(4));
else if (txt.startsWith('spec'))
spec = int.parse(txt.substring(4));
else if (txt.startsWith('gai'))
gai = int.parse(txt.substring(3));
else if (txt.startsWith('nf')) nf = int.parse(txt.substring(2));
}
return [news, ichi, spec, gai, nf];
}
List<Entry> transformXML(XmlElement root) {
final List<Entry> entries = [];
int senseId = 0;
for (final entry in root.childElements) {
final entryId = int.parse(entry.findElements('ent_seq').first.innerText);
final List<KanjiElement> kanjiEls = [];
final List<ReadingElement> readingEls = [];
final List<Sense> senses = [];
for (final k_ele in entry.findAllElements('k_ele')) {
final ke_pri = getPriNums(k_ele, 'ke');
kanjiEls.add(
KanjiElement(
info: k_ele.findElements('ke_inf').map((e) => e.innerText).toList(),
reading: k_ele.findElements('keb').first.innerText,
news: ke_pri[0],
ichi: ke_pri[1],
spec: ke_pri[2],
gai: ke_pri[3],
nf: ke_pri[4],
),
);
}
for (final r_ele in entry.findAllElements('r_ele')) {
final re_pri = getPriNums(r_ele, 're');
readingEls.add(
ReadingElement(
info: r_ele
.findElements('re_inf')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
restrictions:
r_ele.findElements('re_restr').map((e) => e.innerText).toList(),
reading: r_ele.findElements('reb').first.innerText,
news: re_pri[0],
ichi: re_pri[1],
spec: re_pri[2],
gai: re_pri[3],
nf: re_pri[4],
),
);
}
for (final sense in entry.findAllElements('sense')) {
senseId++;
senses.add(
Sense(
id: senseId,
restrictedToKanji:
sense.findElements('stagk').map((e) => e.innerText).toList(),
restrictedToReading:
sense.findElements('stagr').map((e) => e.innerText).toList(),
pos: sense
.findElements('pos')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
misc: sense
.findElements('misc')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
dialects: sense
.findElements('dial')
.map((e) => e.innerText.substring(1, e.innerText.length - 1))
.toList(),
info: sense.findElements('s_inf').map((e) => e.innerText).toList(),
languageSource: sense
.findElements('lsource')
.map(
(e) => LanguageSource(
language: e.getAttribute('xml:lang') ?? 'eng',
fullyDescribesSense: e.getAttribute('ls_type') == 'part',
constructedFromSmallerWords:
e.getAttribute('ls_wasei') == 'y',
),
)
.toList(),
glossary: sense
.findElements('gloss')
.map(
(e) => Glossary(
language: e.getAttribute('xml:lang') ?? 'eng',
phrase: e.innerText,
type: e.getAttribute('g_type'),
),
)
.toList(),
antonyms: sense
.findElements('ant')
.map((e) => XRefParts.fromString(e.innerText))
.toList(),
seeAlso: sense
.findElements('xref')
.map((e) => XRefParts.fromString(e.innerText))
.toList(),
),
);
}
entries.add(
Entry(
id: entryId,
kanji: kanjiEls,
readings: readingEls,
senses: senses,
),
);
}
return entries;
}
Future<void> insertIntoDB(List<Entry> entries, Database db) async {
print(' [JMdict] Batch 1');
Batch b = db.batch();
for (final e in entries) {
b.insert(TableNames.entry, e.sqlValue);
for (final k in e.kanji) {
b.insert(TableNames.kanjiElement, k.sqlValue..addAll({'entryId': e.id}));
// b.insert(
// TableNames.entryByKana,
// {'entryId': e.id, 'kana': transliterateKatakanaToHiragana(k.reading)},
// // Some entries have the same reading twice with difference in katakana and hiragana
// conflictAlgorithm: ConflictAlgorithm.ignore,
// );
for (final i in k.info) {
b.insert(
TableNames.kanjiInfo,
{'entryId': e.id, 'reading': k.reading, 'info': i},
);
}
}
for (final r in e.readings) {
b.insert(
TableNames.readingElement,
r.sqlValue..addAll({'entryId': e.id}),
);
b.insert(
TableNames.entryByKana,
{'entryId': e.id, 'kana': transliterateKanaToLatin(r.reading)},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
for (final i in r.info) {
b.insert(
TableNames.readingInfo,
{'entryId': e.id, 'reading': r.reading, 'info': i},
);
}
for (final res in r.restrictions) {
b.insert(
TableNames.readingRestriction,
{'entryId': e.id, 'reading': r.reading, 'restriction': res},
);
}
}
for (final s in e.senses) {
for (final g in s.glossary) {
if (g.language == "eng")
b.insert(
TableNames.entryByEnglish,
{'entryId': e.id, 'english': g.phrase},
// Some entries have the same reading twice with difference in katakana and hiragana
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
}
}
await b.commit();
print(' [JMdict] Building trees');
SplayTreeMap<String, Set<Entry>> entriesByKanji = SplayTreeMap();
for (final entry in entries) {
for (final kanji in entry.kanji) {
if (entriesByKanji.containsKey(kanji.reading)) {
entriesByKanji.update(kanji.reading, (list) => list..add(entry));
} else {
entriesByKanji.putIfAbsent(kanji.reading, () => {entry});
}
}
}
SplayTreeMap<String, Set<Entry>> entriesByReading = SplayTreeMap();
for (final entry in entries) {
for (final reading in entry.readings) {
if (entriesByReading.containsKey(reading.reading)) {
entriesByReading.update(reading.reading, (list) => list..add(entry));
} else {
entriesByReading.putIfAbsent(reading.reading, () => {entry});
}
}
}
print(' [JMdict] Batch 2');
b = db.batch();
for (final e in entries) {
for (final s in e.senses) {
b.insert(
TableNames.sense, s.sqlValue..addAll({'id': s.id, 'entryId': e.id}));
for (final d in s.dialects) {
b.insert(TableNames.senseDialect, {'senseId': s.id, 'dialect': d});
}
for (final f in s.fields) {
b.insert(TableNames.senseField, {'senseId': s.id, 'field': f});
}
for (final i in s.info) {
b.insert(TableNames.senseInfo, {'senseId': s.id, 'info': i});
}
for (final m in s.misc) {
b.insert(TableNames.senseMisc, {'senseId': s.id, 'misc': m});
}
for (final p in s.pos) {
b.insert(TableNames.sensePOS, {'senseId': s.id, 'pos': p});
}
for (final l in s.languageSource) {
b.insert(
TableNames.senseLanguageSource,
l.sqlValue..addAll({'senseId': s.id}),
);
}
for (final rk in s.restrictedToKanji) {
b.insert(
TableNames.senseRestrictedToKanji,
{'entryId': e.id, 'senseId': s.id, 'kanji': rk},
);
}
for (final rr in s.restrictedToReading) {
b.insert(
TableNames.senseRestrictedToReading,
{'entryId': e.id, 'senseId': s.id, 'reading': rr},
);
}
for (final ls in s.languageSource) {
b.insert(
TableNames.senseLanguageSource,
ls.sqlValue..addAll({'senseId': s.id}),
);
}
for (final g in s.glossary) {
if (g.language == 'eng')
b.insert(
TableNames.senseGlossary,
g.sqlValue..addAll({'senseId': s.id}),
// There are some duplicate glossary, especially in
// the other languages.
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
for (final xref in s.seeAlso) {
final Set<Entry> entries;
if (xref.kanjiRef != null && xref.readingRef != null) {
entries = entriesByKanji[xref.kanjiRef]!
.difference(entriesByReading[xref.readingRef]!);
} else if (xref.kanjiRef != null) {
entries = entriesByKanji[xref.kanjiRef]!;
} else {
entries = entriesByReading[xref.readingRef]!;
}
for (final ex in entries)
if (!(xref.senseNum != null && xref.senseNum! > ex.senses.length)) {
b.insert(
TableNames.senseSeeAlso,
{
'senseId': s.id,
'xrefEntryId': ex.id,
'seeAlsoKanji': xref.kanjiRef,
'seeAlsoReading': xref.readingRef,
'seeAlsoSense': xref.senseNum,
},
);
}
}
for (final ant in s.antonyms) {
final Set<Entry> entries;
if (ant.kanjiRef != null && ant.readingRef != null) {
entries = entriesByKanji[ant.kanjiRef]!
.difference(entriesByReading[ant.readingRef]!);
} else if (ant.kanjiRef != null) {
entries = entriesByKanji[ant.kanjiRef]!;
} else {
entries = entriesByReading[ant.readingRef]!;
}
for (final ex in entries) {
if (!(ant.senseNum != null && ant.senseNum! > ex.senses.length)) {
b.insert(TableNames.senseAntonyms, {
'senseId': s.id,
'xrefEntryId': ex.id,
'antonymKanji': ant.kanjiRef,
'antonymReading': ant.readingRef,
'antonymSense': ant.senseNum,
});
}
}
}
}
}
await b.commit();
}
Future<void> addDataFromJMdict(Database db) async {
print('[JMdict] Reading file...');
String rawXML = File('data/JMdict.xml').readAsStringSync();
print('[JMdict] Parsing XML...');
XmlElement root = XmlDocument.parse(rawXML).getElement('JMdict')!;
print('[JMdict] Transforming data...');
final entries = transformXML(root);
print('[JMdict] Writing to database...');
await insertIntoDB(entries, db);
}

284
bin/kanjidic/objects.dart Normal file
View File

@ -0,0 +1,284 @@
import '../objects.dart';
class TableNames {
static const String character = 'KANJIDIC_Character';
static const String radicalName = 'KANJIDIC_RadicalName';
static const String codepoint = 'KANJIDIC_Codepoint';
static const String radical = 'KANJIDIC_Radical';
static const String strokeMiscount = 'KANJIDIC_StrokeMiscount';
static const String variant = 'KANJIDIC_Variant';
static const String dictionaryReference = '_KANJIDIC_DictionaryReference_Part1';
static const String dictionaryReferenceMoro = '_KANJIDIC_DictionaryReference_Moro';
static const String queryCode = 'KANJIDIC_QueryCode';
static const String reading = 'KANJIDIC_Reading';
static const String kunyomi = 'KANJIDIC_Kunyomi';
static const String onyomi = 'KANJIDIC_Onyomi';
static const String meaning = 'KANJIDIC_Meaning';
static const String nanori = 'KANJIDIC_Nanori';
}
class CodePoint extends SQLWritable {
final String kanji;
final String type;
final String codepoint;
const CodePoint({
required this.kanji,
required this.type,
required this.codepoint,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'codepoint': codepoint,
};
}
class Radical extends SQLWritable {
final String kanji;
final String type;
final String radical;
const Radical({
required this.kanji,
required this.type,
required this.radical,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'radical': radical,
};
}
class StrokeMiscount extends SQLWritable {
final String kanji;
final int strokeCount;
const StrokeMiscount({
required this.kanji,
required this.strokeCount,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'strokeCount': strokeCount,
};
}
class Variant extends SQLWritable {
final String kanji;
final String type;
final String variant;
const Variant({
required this.kanji,
required this.type,
required this.variant,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'variant': variant,
};
}
class DictionaryReference extends SQLWritable {
final String kanji;
final String type;
final String ref;
const DictionaryReference({
required this.kanji,
required this.type,
required this.ref,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'ref': ref,
};
}
class DictionaryReferenceMoro extends SQLWritable {
final String kanji;
final String ref;
final int? volume;
final int? page;
const DictionaryReferenceMoro({
required this.kanji,
required this.ref,
required this.volume,
required this.page,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'ref': ref,
'volume': volume,
'page': page,
};
}
class QueryCode extends SQLWritable {
final String kanji;
final String code;
final String type;
final String? skipMisclassification;
const QueryCode({
required this.kanji,
required this.code,
required this.type,
required this.skipMisclassification,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'code': code,
'type': type,
'skipMisclassification': skipMisclassification,
};
}
class Reading extends SQLWritable {
final String kanji;
final String type;
final String reading;
const Reading({
required this.kanji,
required this.type,
required this.reading,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'type': type,
'reading': reading,
};
}
class Kunyomi extends SQLWritable {
final String kanji;
final String yomi;
final bool isJouyou;
const Kunyomi({
required this.kanji,
required this.yomi,
required this.isJouyou,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'yomi': yomi,
'isJouyou': isJouyou,
};
}
class Onyomi extends SQLWritable {
final String kanji;
final String yomi;
final bool isJouyou;
final String? type;
const Onyomi({
required this.kanji,
required this.yomi,
required this.isJouyou,
required this.type,
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'yomi': yomi,
'isJouyou': isJouyou,
'type': type,
};
}
class Meaning extends SQLWritable {
final String kanji;
final String language;
final String meaning;
const Meaning({
required this.kanji,
required this.language,
this.meaning = 'eng',
});
@override
Map<String, Object?> get sqlValue => {
'kanji': kanji,
'language': language,
'meaning': meaning,
};
}
class Character extends SQLWritable {
final String literal;
final int strokeCount;
final int? grade;
final int? frequency;
final int? jlpt;
final List<String> radicalName;
final List<CodePoint> codepoints;
final List<Radical> radicals;
final List<int> strokeMiscounts;
final List<Variant> variants;
final List<DictionaryReference> dictionaryReferences;
final List<DictionaryReferenceMoro> dictionaryReferencesMoro;
final List<QueryCode> querycodes;
final List<Reading> readings;
final List<Onyomi> onyomi;
final List<Kunyomi> kunyomi;
final List<Meaning> meanings;
final List<String> nanori;
const Character({
required this.literal,
required this.strokeCount,
this.grade,
this.frequency,
this.jlpt,
this.radicalName = const [],
this.codepoints = const [],
this.radicals = const [],
this.strokeMiscounts = const [],
this.variants = const [],
this.dictionaryReferences = const [],
this.dictionaryReferencesMoro = const [],
this.querycodes = const [],
this.readings = const [],
this.onyomi = const [],
this.kunyomi = const [],
this.meanings = const [],
this.nanori = const [],
});
Map<String, Object?> get sqlValue => {
'literal': literal,
'grade': grade,
'strokeCount': strokeCount,
'frequency': frequency,
'jlpt': jlpt,
};
}

231
bin/kanjidic/parser.dart Normal file
View File

@ -0,0 +1,231 @@
import 'dart:io';
import 'package:sqflite_common/sqlite_api.dart';
import 'package:xml/xml.dart';
import 'package:collection/collection.dart';
import 'objects.dart';
List<Character> transformXML(XmlElement root) {
final List<Character> result = [];
for (final c in root.findElements('character')) {
final kanji = c.findElements('literal').first.innerText;
result.add(
Character(
literal: kanji,
strokeCount:
int.parse(c.findAllElements('stroke_count').first.innerText),
grade:
int.tryParse(c.findElements('grade').firstOrNull?.innerText ?? ''),
frequency:
int.tryParse(c.findElements('freq').firstOrNull?.innerText ?? ''),
jlpt: int.tryParse(
c.findElements('rad_name').firstOrNull?.innerText ?? '',
),
radicalName:
c.findElements('rad_name').map((e) => e.innerText).toList(),
codepoints: c
.findAllElements('cp_value')
.map(
(e) => CodePoint(
kanji: kanji,
type: e.getAttribute('cp_type')!,
codepoint: e.innerText,
),
)
.toList(),
radicals: c
.findAllElements('rad_value')
.map(
(e) => Radical(
kanji: kanji,
type: e.getAttribute('rad_type')!,
radical: e.innerText,
),
)
.toList(),
strokeMiscounts: c
.findAllElements('stroke_count')
.skip(1)
.map((e) => int.parse(e.innerText))
.toList(),
variants: c
.findAllElements('variant')
.map(
(e) => Variant(
kanji: kanji,
type: e.getAttribute('var_type')!,
variant: e.innerText,
),
)
.toList(),
dictionaryReferences: c
.findAllElements('dic_ref')
.where((e) => e.getAttribute('dr_type') != 'moro')
.map(
(e) => DictionaryReference(
kanji: kanji,
type: e.getAttribute('dr_type')!,
ref: e.innerText,
),
)
.toList(),
dictionaryReferencesMoro: c
.findAllElements('dic_ref')
.where((e) => e.getAttribute('dr_type') == 'moro')
.map(
(e) => DictionaryReferenceMoro(
kanji: kanji,
ref: e.innerText,
page: int.tryParse(e.getAttribute('m_page') ?? ''),
volume: int.tryParse(e.getAttribute('m_vol') ?? ''),
),
)
.toList(),
querycodes: c
.findAllElements('q_code')
.map(
(e) => QueryCode(
kanji: kanji,
code: e.innerText,
type: e.getAttribute('qc_type')!,
skipMisclassification: e.getAttribute('skip_misclass'),
),
)
.toList(),
readings: c
.findAllElements('reading')
.where(
(e) => !['ja_on', 'ja_kun'].contains(e.getAttribute('r_type')),
)
.map(
(e) => Reading(
kanji: kanji,
type: e.getAttribute('r_type')!,
reading: e.innerText,
),
)
.toList(),
kunyomi: c
.findAllElements('reading')
.where((e) => e.getAttribute('r_type') == 'ja_kun')
.map(
(e) => Kunyomi(
kanji: kanji,
yomi: e.innerText,
isJouyou: e.getAttribute('r_status') == 'jy',
),
)
.toList(),
onyomi: c
.findAllElements('reading')
.where((e) => e.getAttribute('r_type') == 'ja_on')
.map(
(e) => Onyomi(
kanji: kanji,
yomi: e.innerText,
isJouyou: e.getAttribute('r_status') == 'jy',
type: e.getAttribute('on_type')),
)
.toList(),
meanings: c
.findAllElements('meaning')
.map(
(e) => Meaning(
kanji: kanji,
language: e.getAttribute('m_lang') ?? 'eng',
meaning: e.innerText,
),
)
.toList(),
nanori: c.findAllElements('nanori').map((e) => e.innerText).toList(),
),
);
}
return result;
}
Future<void> insertIntoDB(List<Character> characters, Database db) async {
final b = db.batch();
for (final c in characters) {
// if (c.dictionaryReferences.any((e) =>
// c.dictionaryReferences
// .where((e2) => e.kanji == e2.kanji && e.type == e2.type)
// .length >
// 1)) {
// print(c.dictionaryReferences.map((e) => e.sqlValue).toList());
// }
b.insert(TableNames.character, c.sqlValue);
for (final n in c.radicalName) {
b.insert(TableNames.radicalName, {'kanji': c.literal, 'name': n});
}
for (final cp in c.codepoints) {
b.insert(TableNames.codepoint, cp.sqlValue);
}
for (final r in c.radicals) {
b.insert(TableNames.radical, r.sqlValue);
}
for (final sm in c.strokeMiscounts) {
b.insert(
TableNames.strokeMiscount,
{
'kanji': c.literal,
'strokeCount': sm,
},
);
}
for (final v in c.variants) {
b.insert(TableNames.variant, v.sqlValue);
}
for (final dr in c.dictionaryReferences) {
// There are duplicate entries here
b.insert(
TableNames.dictionaryReference,
dr.sqlValue,
conflictAlgorithm: ConflictAlgorithm.ignore,
);
}
for (final drm in c.dictionaryReferencesMoro) {
b.insert(TableNames.dictionaryReferenceMoro, drm.sqlValue);
}
for (final q in c.querycodes) {
b.insert(TableNames.queryCode, q.sqlValue);
}
for (final r in c.readings) {
b.insert(TableNames.reading, r.sqlValue);
}
for (final k in c.kunyomi) {
b.insert(TableNames.kunyomi, k.sqlValue);
}
for (final o in c.onyomi) {
b.insert(TableNames.onyomi, o.sqlValue);
}
for (final m in c.meanings) {
b.insert(TableNames.meaning, m.sqlValue);
}
for (final n in c.nanori) {
b.insert(
TableNames.nanori,
{
'kanji': c.literal,
'nanori': n,
},
);
}
}
b.commit();
}
Future<void> addDataFromKANJIDIC(Database db) async {
print('[KANJIDIC2] Reading file...');
String rawXML = File('data/kanjidic2.xml').readAsStringSync();
print('[KANJIDIC2] Parsing XML...');
XmlElement root = XmlDocument.parse(rawXML).getElement('kanjidic2')!;
print('[KANJIDIC2] Transforming data...');
final entries = transformXML(root);
print('[KANJIDIC2] Writing to database...');
await insertIntoDB(entries, db);
}

5
bin/objects.dart Normal file
View File

@ -0,0 +1,5 @@
abstract class SQLWritable {
const SQLWritable();
Map<String, Object?> get sqlValue;
}

13
bin/radkfile/objects.dart Normal file
View File

@ -0,0 +1,13 @@
class Radical {
final String radical;
final String kanji;
// TODO:
final String something;
const Radical({
required this.radical,
required this.kanji,
required this.something,
});
}

32
bin/radkfile/parser.dart Normal file
View File

@ -0,0 +1,32 @@
import 'dart:io';
import 'package:sqflite_common/sqlite_api.dart';
Future<void> addDataFromRADKFILE(Database db) async {
final String content = File('data/radkfile_utf8').readAsStringSync();
final Iterable<String> blocks =
content.replaceAll(RegExp(r'^#.*$'), '').split(r'$').skip(2);
print('[RADKFILE] Writing to database...');
final b = db.batch();
for (final block in blocks) {
final String radical = block[1];
final List<String> kanjiList = block
.replaceFirst(RegExp(r'.*\n'), '')
.split('')
..removeWhere((e) => e == '' || e == '\n');
for (final kanji in kanjiList.toSet()) {
b.insert(
'RADKFILE',
{
'radical': radical,
'kanji': kanji,
},
);
}
}
b.commit();
}

View File

@ -0,0 +1,622 @@
// Source: https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb
const hiragana_syllabic_n = '';
const hiragana_small_tsu = '';
const Map<String, String> hiragana_to_latin = {
'': 'a',
'': 'i',
'': 'u',
'': 'e',
'': 'o',
'': 'ka',
'': 'ki',
'': 'ku',
'': 'ke',
'': 'ko',
'': 'ga',
'': 'gi',
'': 'gu',
'': 'ge',
'': 'go',
'': 'sa',
'': 'shi',
'': 'su',
'': 'se',
'': 'so',
'': 'za',
'': 'ji',
'': 'zu',
'': 'ze',
'': 'zo',
'': 'ta',
'': 'chi',
'': 'tsu',
'': 'te',
'': 'to',
'': 'da',
'': 'ji',
'': 'zu',
'': 'de',
'': 'do',
'': 'na',
'': 'ni',
'': 'nu',
'': 'ne',
'': 'no',
'': 'ha',
'': 'hi',
'': 'fu',
'': 'he',
'': 'ho',
'': 'ba',
'': 'bi',
'': 'bu',
'': 'be',
'': 'bo',
'': 'pa',
'': 'pi',
'': 'pu',
'': 'pe',
'': 'po',
'': 'ma',
'': 'mi',
'': 'mu',
'': 'me',
'': 'mo',
'': 'ya',
'': 'yu',
'': 'yo',
'': 'ra',
'': 'ri',
'': 'ru',
'': 're',
'': 'ro',
'': 'wa',
'うぃ': 'whi',
'うぇ': 'whe',
'': 'wo',
'': 'we',
'': 'wi',
'': '-',
'': 'n',
'きゃ': 'kya',
'きゅ': 'kyu',
'きょ': 'kyo',
'きぇ': 'kye',
'きぃ': 'kyi',
'ぎゃ': 'gya',
'ぎゅ': 'gyu',
'ぎょ': 'gyo',
'ぎぇ': 'gye',
'ぎぃ': 'gyi',
'くぁ': 'kwa',
'くぃ': 'kwi',
'くぅ': 'kwu',
'くぇ': 'kwe',
'くぉ': 'kwo',
'ぐぁ': 'qwa',
'ぐぃ': 'gwi',
'ぐぅ': 'gwu',
'ぐぇ': 'gwe',
'ぐぉ': 'gwo',
'しゃ': 'sha',
'しぃ': 'syi',
'しゅ': 'shu',
'しぇ': 'she',
'しょ': 'sho',
'じゃ': 'ja',
'じゅ': 'ju',
'じぇ': 'jye',
'じょ': 'jo',
'じぃ': 'jyi',
'すぁ': 'swa',
'すぃ': 'swi',
'すぅ': 'swu',
'すぇ': 'swe',
'すぉ': 'swo',
'ちゃ': 'cha',
'ちゅ': 'chu',
'ちぇ': 'tye',
'ちょ': 'cho',
'ちぃ': 'tyi',
'ぢゃ': 'ja',
'ぢぃ': 'dyi',
'ぢゅ': 'ju',
'ぢぇ': 'dye',
'ぢょ': 'jo',
'つぁ': 'tsa',
'つぃ': 'tsi',
'つぇ': 'tse',
'つぉ': 'tso',
'てゃ': 'tha',
'てぃ': 'thi',
'てゅ': 'thu',
'てぇ': 'the',
'てょ': 'tho',
'とぁ': 'twa',
'とぃ': 'twi',
'とぅ': 'twu',
'とぇ': 'twe',
'とぉ': 'two',
'でゃ': 'dha',
'でぃ': 'dhi',
'でゅ': 'dhu',
'でぇ': 'dhe',
'でょ': 'dho',
'どぁ': 'dwa',
'どぃ': 'dwi',
'どぅ': 'dwu',
'どぇ': 'dwe',
'どぉ': 'dwo',
'にゃ': 'nya',
'にゅ': 'nyu',
'にょ': 'nyo',
'にぇ': 'nye',
'にぃ': 'nyi',
'ひゃ': 'hya',
'ひぃ': 'hyi',
'ひゅ': 'hyu',
'ひぇ': 'hye',
'ひょ': 'hyo',
'びゃ': 'bya',
'びぃ': 'byi',
'びゅ': 'byu',
'びぇ': 'bye',
'びょ': 'byo',
'ぴゃ': 'pya',
'ぴぃ': 'pyi',
'ぴゅ': 'pyu',
'ぴぇ': 'pye',
'ぴょ': 'pyo',
'ふぁ': 'fwa',
'ふぃ': 'fyi',
'ふぇ': 'fye',
'ふぉ': 'fwo',
'ふぅ': 'fwu',
'ふゃ': 'fya',
'ふゅ': 'fyu',
'ふょ': 'fyo',
'みゃ': 'mya',
'みぃ': 'myi',
'みゅ': 'myu',
'みぇ': 'mye',
'みょ': 'myo',
'りゃ': 'rya',
'りぃ': 'ryi',
'りゅ': 'ryu',
'りぇ': 'rye',
'りょ': 'ryo',
'ゔぁ': 'va',
'ゔぃ': 'vyi',
'': 'vu',
'ゔぇ': 'vye',
'ゔぉ': 'vo',
'ゔゃ': 'vya',
'ゔゅ': 'vyu',
'ゔょ': 'vyo',
'うぁ': 'wha',
'いぇ': 'ye',
'うぉ': 'who',
'': 'xa',
'': 'xi',
'': 'xu',
'': 'xe',
'': 'xo',
'': 'xka',
'': 'xke',
'': 'xwa'
};
const Map<String, String> latin_to_hiragana = {
'a': '',
'i': '',
'u': '',
'e': '',
'o': '',
'ka': '',
'ki': '',
'ku': '',
'ke': '',
'ko': '',
'ga': '',
'gi': '',
'gu': '',
'ge': '',
'go': '',
'sa': '',
'si': '',
'shi': '',
'su': '',
'se': '',
'so': '',
'za': '',
'zi': '',
'ji': '',
'zu': '',
'ze': '',
'zo': '',
'ta': '',
'ti': '',
'chi': '',
'tu': '',
'tsu': '',
'te': '',
'to': '',
'da': '',
'di': '',
'du': '',
'dzu': '',
'de': '',
'do': '',
'na': '',
'ni': '',
'nu': '',
'ne': '',
'no': '',
'ha': '',
'hi': '',
'hu': '',
'fu': '',
'he': '',
'ho': '',
'ba': '',
'bi': '',
'bu': '',
'be': '',
'bo': '',
'pa': '',
'pi': '',
'pu': '',
'pe': '',
'po': '',
'ma': '',
'mi': '',
'mu': '',
'me': '',
'mo': '',
'ya': '',
'yu': '',
'yo': '',
'ra': '',
'ri': '',
'ru': '',
're': '',
'ro': '',
'la': '',
'li': '',
'lu': '',
'le': '',
'lo': '',
'wa': '',
'wi': 'うぃ',
'we': 'うぇ',
'wo': '',
'wye': '',
'wyi': '',
'-': '',
'n': '',
'nn': '',
"n'": '',
'kya': 'きゃ',
'kyu': 'きゅ',
'kyo': 'きょ',
'kye': 'きぇ',
'kyi': 'きぃ',
'gya': 'ぎゃ',
'gyu': 'ぎゅ',
'gyo': 'ぎょ',
'gye': 'ぎぇ',
'gyi': 'ぎぃ',
'kwa': 'くぁ',
'kwi': 'くぃ',
'kwu': 'くぅ',
'kwe': 'くぇ',
'kwo': 'くぉ',
'gwa': 'ぐぁ',
'gwi': 'ぐぃ',
'gwu': 'ぐぅ',
'gwe': 'ぐぇ',
'gwo': 'ぐぉ',
'qwa': 'ぐぁ',
'qwi': 'ぐぃ',
'qwu': 'ぐぅ',
'qwe': 'ぐぇ',
'qwo': 'ぐぉ',
'sya': 'しゃ',
'syi': 'しぃ',
'syu': 'しゅ',
'sye': 'しぇ',
'syo': 'しょ',
'sha': 'しゃ',
'shu': 'しゅ',
'she': 'しぇ',
'sho': 'しょ',
'ja': 'じゃ',
'ju': 'じゅ',
'je': 'じぇ',
'jo': 'じょ',
'jya': 'じゃ',
'jyi': 'じぃ',
'jyu': 'じゅ',
'jye': 'じぇ',
'jyo': 'じょ',
'zya': 'じゃ',
'zyu': 'じゅ',
'zyo': 'じょ',
'zye': 'じぇ',
'zyi': 'じぃ',
'swa': 'すぁ',
'swi': 'すぃ',
'swu': 'すぅ',
'swe': 'すぇ',
'swo': 'すぉ',
'cha': 'ちゃ',
'chu': 'ちゅ',
'che': 'ちぇ',
'cho': 'ちょ',
'cya': 'ちゃ',
'cyi': 'ちぃ',
'cyu': 'ちゅ',
'cye': 'ちぇ',
'cyo': 'ちょ',
'tya': 'ちゃ',
'tyi': 'ちぃ',
'tyu': 'ちゅ',
'tye': 'ちぇ',
'tyo': 'ちょ',
'dya': 'ぢゃ',
'dyi': 'ぢぃ',
'dyu': 'ぢゅ',
'dye': 'ぢぇ',
'dyo': 'ぢょ',
'tsa': 'つぁ',
'tsi': 'つぃ',
'tse': 'つぇ',
'tso': 'つぉ',
'tha': 'てゃ',
'thi': 'てぃ',
'thu': 'てゅ',
'the': 'てぇ',
'tho': 'てょ',
'twa': 'とぁ',
'twi': 'とぃ',
'twu': 'とぅ',
'twe': 'とぇ',
'two': 'とぉ',
'dha': 'でゃ',
'dhi': 'でぃ',
'dhu': 'でゅ',
'dhe': 'でぇ',
'dho': 'でょ',
'dwa': 'どぁ',
'dwi': 'どぃ',
'dwu': 'どぅ',
'dwe': 'どぇ',
'dwo': 'どぉ',
'nya': 'にゃ',
'nyu': 'にゅ',
'nyo': 'にょ',
'nye': 'にぇ',
'nyi': 'にぃ',
'hya': 'ひゃ',
'hyi': 'ひぃ',
'hyu': 'ひゅ',
'hye': 'ひぇ',
'hyo': 'ひょ',
'bya': 'びゃ',
'byi': 'びぃ',
'byu': 'びゅ',
'bye': 'びぇ',
'byo': 'びょ',
'pya': 'ぴゃ',
'pyi': 'ぴぃ',
'pyu': 'ぴゅ',
'pye': 'ぴぇ',
'pyo': 'ぴょ',
'fa': 'ふぁ',
'fi': 'ふぃ',
'fe': 'ふぇ',
'fo': 'ふぉ',
'fwa': 'ふぁ',
'fwi': 'ふぃ',
'fwu': 'ふぅ',
'fwe': 'ふぇ',
'fwo': 'ふぉ',
'fya': 'ふゃ',
'fyi': 'ふぃ',
'fyu': 'ふゅ',
'fye': 'ふぇ',
'fyo': 'ふょ',
'mya': 'みゃ',
'myi': 'みぃ',
'myu': 'みゅ',
'mye': 'みぇ',
'myo': 'みょ',
'rya': 'りゃ',
'ryi': 'りぃ',
'ryu': 'りゅ',
'rye': 'りぇ',
'ryo': 'りょ',
'lya': 'りゃ',
'lyu': 'りゅ',
'lyo': 'りょ',
'lye': 'りぇ',
'lyi': 'りぃ',
'va': 'ゔぁ',
'vi': 'ゔぃ',
'vu': '',
've': 'ゔぇ',
'vo': 'ゔぉ',
'vya': 'ゔゃ',
'vyi': 'ゔぃ',
'vyu': 'ゔゅ',
'vye': 'ゔぇ',
'vyo': 'ゔょ',
'wha': 'うぁ',
'whi': 'うぃ',
'ye': 'いぇ',
'whe': 'うぇ',
'who': 'うぉ',
'xa': '',
'xi': '',
'xu': '',
'xe': '',
'xo': '',
'xya': '',
'xyu': '',
'xyo': '',
'xtu': '',
'xtsu': '',
'xka': '',
'xke': '',
'xwa': '',
'@@': ' ',
'#[': '',
'#]': '',
'#,': '',
'#.': '',
'#/': '',
};
bool _smallTsu(String for_conversion) => for_conversion == hiragana_small_tsu;
bool _nFollowedByYuYeYo(String for_conversion, String kana) =>
for_conversion == hiragana_syllabic_n &&
kana.length > 1 &&
'やゆよ'.contains(kana.substring(1, 2));
String transliterateHiraganaToLatin(String hiragana) {
String kana = hiragana;
String romaji = '';
bool geminate = false;
while (kana.isNotEmpty) {
final lengths = [if (kana.length > 1) 2, 1];
for (final length in lengths) {
final String for_conversion = kana.substring(0, length);
String? mora;
if (_smallTsu(for_conversion)) {
geminate = true;
kana = kana.replaceRange(0, length, '');
break;
} else if (_nFollowedByYuYeYo(for_conversion, kana)) {
mora = "n'";
}
mora ??= hiragana_to_latin[for_conversion];
if (mora != null) {
if (geminate) {
geminate = false;
romaji += mora.substring(0, 1);
}
romaji += mora;
kana = kana.replaceRange(0, length, '');
break;
} else if (length == 1) {
romaji += for_conversion;
kana = kana.replaceRange(0, length, '');
}
}
}
return romaji;
}
bool _doubleNFollowedByAIUEO(String for_conversion) =>
RegExp(r'^nn[aiueo]$').hasMatch(for_conversion);
bool _hasTableMatch(String for_conversion) =>
latin_to_hiragana[for_conversion] != null;
bool _hasDoubleConsonant(String for_conversion, int length) =>
for_conversion == 'tch' ||
(length == 2 &&
RegExp(r'^([kgsztdnbpmyrlwchf])\1$').hasMatch(for_conversion));
String transliterateLatinToHiragana(String latin) {
String romaji =
latin.toLowerCase().replaceAll('mb', 'nb').replaceAll('mp', 'np');
String kana = '';
while (romaji.isNotEmpty) {
final lengths = [
if (romaji.length > 2) 3,
if (romaji.length > 1) 2,
1,
];
for (final length in lengths) {
String? mora;
int for_removal = length;
final String for_conversion = romaji.substring(0, length);
if (_doubleNFollowedByAIUEO(for_conversion)) {
mora = hiragana_syllabic_n;
for_removal = 1;
} else if (_hasTableMatch(for_conversion)) {
mora = latin_to_hiragana[for_conversion];
} else if (_hasDoubleConsonant(for_conversion, length)) {
mora = hiragana_small_tsu;
for_removal = 1;
}
if (mora != null) {
kana += mora;
romaji = romaji.replaceRange(0, for_removal, '');
break;
} else if (length == 1) {
kana += for_conversion;
romaji = romaji.replaceRange(0, 1, '');
}
}
}
return kana;
}
String _transposeCodepointsInRange(
String text,
int distance,
int rangeStart,
int rangeEnd,
) =>
String.fromCharCodes(
text.codeUnits
.map((c) => c + ((rangeStart <= c && c <= rangeEnd) ? distance : 0)),
);
String transliterateKanaToLatin(String kana) =>
transliterateHiraganaToLatin(transliterateKatakanaToHiragana(kana));
String transliterateLatinToKatakana(String latin) =>
transliterateHiraganaToKatakana(transliterateLatinToHiragana(latin));
String transliterateKatakanaToHiragana(String katakana) =>
_transposeCodepointsInRange(katakana, -96, 12449, 12534);
String transliterateHiraganaToKatakana(String hiragana) =>
_transposeCodepointsInRange(hiragana, 96, 12353, 12438);
String transliterateFullwidthRomajiToHalfwidth(String halfwidth) =>
_transposeCodepointsInRange(
_transposeCodepointsInRange(
halfwidth,
-65248,
65281,
65374,
),
-12256,
12288,
12288,
);
String transliterateHalfwidthRomajiToFullwidth(String halfwidth) =>
_transposeCodepointsInRange(
_transposeCodepointsInRange(
halfwidth,
65248,
33,
126,
),
12256,
32,
32,
);

122
flake.lock Normal file
View File

@ -0,0 +1,122 @@
{
"nodes": {
"JMdictSrc": {
"flake": false,
"locked": {
"narHash": "sha256-TAkT98/lC1zBAJ/ublGi/gK965pwxoHJrnWRaKKBq7I=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"
},
"original": {
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz"
}
},
"JMdictWithExamplesSrc": {
"flake": false,
"locked": {
"narHash": "sha256-GfClwLR4uoxPKxRbI5qgELurAdpegCbZO5lEORb3EvA=",
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
},
"original": {
"type": "file",
"url": "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz"
}
},
"RADKFILESrc": {
"flake": false,
"locked": {
"narHash": "sha256-rO2z5GPt3g6osZOlpyWysmIbRV2Gw4AR4XvngVTHNpk=",
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
},
"original": {
"type": "file",
"url": "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz"
}
},
"flake-utils": {
"locked": {
"lastModified": 1649676176,
"narHash": "sha256-OWKJratjt2RW151VUlJPRALb7OU2S5s+f0vLj4o1bHM=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "a4b154ebbdc88c8498a5c7b01589addc9e9cb678",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nix-dart": {
"inputs": {
"flake-utils": [
"flake-utils"
],
"nixpkgs": [
"nixpkgs"
],
"pub2nix": "pub2nix"
},
"locked": {
"lastModified": 1652213615,
"narHash": "sha256-+eehm2JlhoKgY+Ea4DTxDMei/x4Fgz7S+ZPqWpZysuI=",
"owner": "tadfisher",
"repo": "nix-dart",
"rev": "6f686ddf984306d944e9b5adf9f35f3a0a0a70b7",
"type": "github"
},
"original": {
"owner": "tadfisher",
"repo": "nix-dart",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1655456688,
"narHash": "sha256-j2trI5gv2fnHdfUQFBy957avCPxxzCqE8R+TOYHPSRE=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "d17a56d90ecbd1b8fc908d49598fb854ef188461",
"type": "github"
},
"original": {
"id": "nixpkgs",
"ref": "nixos-22.05",
"type": "indirect"
}
},
"pub2nix": {
"flake": false,
"locked": {
"lastModified": 1594192744,
"narHash": "sha256-pDvcXSG1Mh2BpwkqAcNDJzcupV3pIAAtZJLfkiHMAz4=",
"owner": "paulyoung",
"repo": "pub2nix",
"rev": "0c7ecca590fcd1616db8c6468f799ffef36c85e9",
"type": "github"
},
"original": {
"owner": "paulyoung",
"repo": "pub2nix",
"type": "github"
}
},
"root": {
"inputs": {
"JMdictSrc": "JMdictSrc",
"JMdictWithExamplesSrc": "JMdictWithExamplesSrc",
"RADKFILESrc": "RADKFILESrc",
"flake-utils": "flake-utils",
"nix-dart": "nix-dart",
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

208
flake.nix Normal file
View File

@ -0,0 +1,208 @@
{
description = "A SQLite database containing open source japanese language translation data";
inputs = {
nixpkgs.url = "nixpkgs/nixos-22.05";
flake-utils = {
url = "github:numtide/flake-utils";
inputs.nixpkgs.follows = "nixpkgs";
};
nix-dart = {
url = "github:tadfisher/nix-dart";
inputs = {
nixpkgs.follows = "nixpkgs";
flake-utils.follows = "flake-utils";
};
};
JMdictSrc = {
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict.gz";
flake = false;
};
JMdictWithExamplesSrc = {
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e_examp.gz";
flake = false;
};
RADKFILESrc = {
url = "http://ftp.usf.edu/pub/ftp.monash.edu.au/pub/nihongo/radkfile.gz";
flake = false;
};
};
outputs = {
self,
nixpkgs,
flake-utils,
nix-dart,
JMdictSrc,
JMdictWithExamplesSrc,
RADKFILESrc
}: let
system = "x86_64-linux";
pkgs = import nixpkgs {
inherit system;
overlays = [
# (final: prev: { dart = nix-dart.packages.${system}.dart; })
nix-dart.overlay
];
};
inherit (pkgs) lib;
in {
devShell.${system} = pkgs.mkShell {
buildInputs = with pkgs; [
nix-dart.packages.${system}.pub2nix-lock
dart
gnumake
sqlite
sqlite-web
sqlint
sqlfluff
];
};
defaultPackage.${system} = self.packages.${system}.database;
packages.${system} = let
inherit (pkgs.stdenv) mkDerivation;
dbName = "main.db";
edrdgMetadata = {
license = {
shortName = "EDRDG";
fullName = "Electronic Dictionary Research and Development Group General Dictionary Licence";
url = "http://www.csse.monash.edu.au/~jwb/edrdg/licence.html";
};
maintainers = [ "h7x4 <h7x4@nani.wtf>" ];
platforms = lib.platforms.all;
};
in {
JMdict = mkDerivation {
name = "JMdict";
srcs = [
JMdictSrc
JMdictWithExamplesSrc
];
dontUnpack = true;
nativeBuildInputs = with pkgs; [ xmlformat ];
buildPhase = ''
gzip -dkc ${JMdictSrc} > jmdict.xml
gzip -dkc ${JMdictWithExamplesSrc} > jmdict_with_examples.xml
xmlformat -i jmdict.xml
xmlformat -i jmdict_with_examples.xml
'';
installPhase = ''
mkdir $out
cp jmdict.xml $out
cp jmdict_with_examples.xml $out
'';
meta = edrdgMetadata // {
description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
};
};
RADKFILE = mkDerivation {
name = "RADKFILE";
src = RADKFILESrc;
dontUnpack = true;
buildPhase = ''
gzip -dkc $src > radkfile
'';
installPhase = ''
iconv -f EUC-JP -t UTF-8 -o $out radkfile
'';
meta = edrdgMetadata // {
description = "A file providing searchable decompositions of kanji characters";
homepage = "https://www.edrdg.org/krad/kradinf.html";
};
};
database_generator = (nix-dart.builders.${system}.buildDartPackage {
pname = "database_generator";
version = "1.0";
buildInputs = [ nix-dart.packages.${system}.dart-dev ];
src = builtins.filterSource (path: type: baseNameOf path != ".dart_tool") ./.;
specFile = ./pubspec.yaml;
lockFile = ./pub2nix.lock;
}).overrideAttrs(old: {
buildInputs = [nix-dart.packages.${system}.dart-dev];
buildPhase = builtins.replaceStrings ["pub"] ["dart pub"] old.buildPhase;
});
database = mkDerivation {
name = "database";
src = builtins.filterSource (path: type: baseNameOf path != dbName) ./.;
nativeBuildInputs = with pkgs; [
sqlite
];
buildPhase = ''
mkdir -p data
ln -s ${self.packages.${system}.JMdict}/* data
ln -s ${self.packages.${system}.RADKFILE} data
sqlite3 ${dbName} < migrations/0001_initial.sql
sqlite3 ${dbName} < migrations/0002_insert_info_values.sql
'';
installPhase = ''
mkdir -p $out
cp migrations/0001_initial.sql $out/schema.sql
cp ${dbName} $out/${dbName}
'';
};
docs = mkDerivation {
name = "docs";
src = self.packages.${system}.database;
nativeBuildInputs = with pkgs; [
schemaspy
sqlite-jdbc
];
buildPhase = let
properties = pkgs.writeText "sqlite.properties" ''
description=SQLite
driver=org.sqlite.JDBC
driverPath=${pkgs.sqlite-jdbc}/share/java/sqlite-jdbc-3.25.2.jar
connectionSpec=jdbc:sqlite:<db>
'';
args = pkgs.writeText "schemaspy.properties" ''
schemaspy.cat="%"
schemaspy.t=sqlite
schemaspy.sso=true
schemaspy.db=${dbName}
schemaspy.o=docs
schemaspy.s=schema.sql
'';
in ''
cp ${args} ./schemaspy.properties
ls
schemaspy -t ${properties}
'';
installPhase = ''
cp -r docs $out
'';
};
};
};
}

443
migrations/0001_initial.sql Normal file
View File

@ -0,0 +1,443 @@
-- TODO: figure out ondelete functions...
------------
-- JMdict --
------------
CREATE TABLE "JMdict_InfoDialect" (
"id" VARCHAR(4) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoField" (
"id" VARCHAR(7) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoKanji" (
"id" VARCHAR(5) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoMisc" (
"id" VARCHAR(12) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoPOS" (
"id" VARCHAR(9) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE "JMdict_InfoReading" (
"id" VARCHAR(5) PRIMARY KEY NOT NULL,
"description" TEXT NOT NULL
) WITHOUT ROWID;
-- The XML specification says that an entry needs to have at least
-- one sense and one reading. I will just assume this is the case, and
-- not implement a check for it.
CREATE TABLE "JMdict_Entry" (
"id" INTEGER PRIMARY KEY
);
-- KanjiElement
CREATE TABLE "JMdict_KanjiElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"reading" TEXT NOT NULL,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER,
PRIMARY KEY ("entryId", "reading")
) WITHOUT ROWID;
CREATE TABLE "JMdict_KanjiElementInfo" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"info" TEXT NOT NULL REFERENCES "JMdict_InfoKanji"("id"),
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_KanjiElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "info")
) WITHOUT ROWID;
-- ReadingElement
CREATE TABLE "JMdict_ReadingElement" (
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
"reading" TEXT NOT NULL,
"readingDoesNotMatchKanji" BOOLEAN NOT NULL DEFAULT FALSE,
"news" INTEGER CHECK ("news" BETWEEN 1 AND 2),
"ichi" INTEGER CHECK ("ichi" BETWEEN 1 AND 2),
"spec" INTEGER CHECK ("spec" BETWEEN 1 AND 2),
"gai" INTEGER CHECK ("gai" BETWEEN 1 AND 2),
"nf" INTEGER,
PRIMARY KEY ("entryId", "reading")
) WITHOUT ROWID;
CREATE TABLE "JMdict_ReadingElementRestriction" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"restriction" TEXT NOT NULL,
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "restriction")
) WITHOUT ROWID;
CREATE TABLE "JMdict_ReadingElementInfo" (
"entryId" INTEGER NOT NULL,
"reading" TEXT NOT NULL,
"info" TEXT NOT NULL REFERENCES "JMdict_InfoReading"("id"),
FOREIGN KEY ("entryId", "reading")
REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "reading", "info")
) WITHOUT ROWID;
-- Sense
-- Optimal solution here would be to have an id INTEGER AUTOINCREMENT,
-- and the entryId as a composite key, since the entryId is used below.
-- However, autoincrementing composite keys are not available in sqlite
CREATE TABLE "JMdict_Sense" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"entryId" INTEGER REFERENCES "JMdict_Entry"("id")
);
CREATE TABLE "JMdict_SenseRestrictedToKanji" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"kanji" TEXT,
FOREIGN KEY ("entryId", "kanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
PRIMARY KEY ("entryId", "senseId", "kanji")
);
CREATE TABLE "JMdict_SenseRestrictedToReading" (
"entryId" INTEGER,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"reading" TEXT,
FOREIGN KEY ("entryId", "reading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("entryId", "senseId", "reading")
);
-- In order to add xrefs, you will need to have added the entry to xref to.
-- These should be added in a second pass of the dictionary file.
-- In this version of JMdict, the xrefs can be ambiguous.
-- There has been rumours of a nonambiguous version possibly arriving in the future
-- (https://www.edrdg.org/jmdict_edict_list/2019/msg00360.html)
-- but for time being, this need to be modeled as a one to many relationship.
-- These two things also concern "SenseAntonym"
CREATE TABLE "JMdict_SenseSeeAlso" (
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER,
"seeAlsoReading" TEXT,
"seeAlsoKanji" TEXT,
"seeAlsoSense" TEXT REFERENCES "JMdict_Sense"("id"),
CHECK ("seeAlsoReading" = NULL <> "seeAlsoKanji" = NULL),
-- CHECK("seeAlsoSense" = NULL OR "seeAlsoSense")
-- Check that if seeAlsoSense is present, it refers to a sense connected to xrefEntryId.
FOREIGN KEY ("xrefEntryId", "seeAlsoKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
FOREIGN KEY ("xrefEntryId", "seeAlsoReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("senseId", "xrefEntryId", "seeAlsoReading", "seeAlsoKanji", "seeAlsoSense")
);
CREATE TABLE "JMdict_SenseAntonym" (
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"xrefEntryId" INTEGER,
"antonymReading" TEXT,
"antonymKanji" TEXT,
"antonymSense" TEXT REFERENCES "JMdict_Sense"("id"),
CHECK ("antonymReading" = NULL <> "antonymKanji" = NULL),
FOREIGN KEY ("xrefEntryId", "antonymKanji") REFERENCES "JMdict_KanjiElement"("entryId", "kanji"),
FOREIGN KEY ("xrefEntryId", "antonymReading") REFERENCES "JMdict_ReadingElement"("entryId", "reading"),
PRIMARY KEY ("senseId", "xrefEntryId", "antonymReading", "antonymKanji", "antonymSense")
);
-- These cross references are going to be mostly accessed from a sense
-- This will speed up the join.
CREATE INDEX "JMdict_SenseSeeAlso_bySenseId" ON "JMdict_SenseSeeAlso"("senseId");
CREATE INDEX "JMdict_SenseAntonym_bySenseId" ON "JMdict_SenseAntonym"("senseId");
CREATE TABLE "JMdict_SensePOS" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"pos" TEXT NOT NULL REFERENCES "JMdict_InfoPOS"("id"),
PRIMARY KEY ("senseId", "pos")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseField" (
"senseId" INTEGER NOT NULL,
"field" TEXT NOT NULL,
FOREIGN KEY ("senseId") REFERENCES "JMdict_Sense"("id"),
FOREIGN KEY ("field") REFERENCES "JMdict_InfoField"("id"),
PRIMARY KEY ("senseId", "field")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseMisc" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"misc" TEXT NOT NULL REFERENCES "JMdict_InfoMisc"("id"),
PRIMARY KEY ("senseId", "misc")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseLanguageSource" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"language" CHAR(3) NOT NULL DEFAULT "eng",
"phrase" TEXT,
"fullyDescribesSense" BOOLEAN NOT NULL DEFAULT TRUE,
"constructedFromSmallerWords" BOOLEAN NOT NULL DEFAULT FALSE,
PRIMARY KEY ("senseId", "language", "phrase")
);
CREATE TABLE "JMdict_SenseDialect" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"dialect" TEXT NOT NULL REFERENCES "JMdict_InfoDialect"("dialect"),
PRIMARY KEY ("senseId", "dialect")
) WITHOUT ROWID;
-- In the documentation, it says that the glossary can contain
-- special prioritized entries, but I can't find a single one of those.
-- Neither can I find a glossary tag with g_gend data, so these parts
-- will be omitted.
CREATE TABLE "JMdict_SenseGlossary" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"phrase" TEXT NOT NULL,
"language" CHAR(3) NOT NULL DEFAULT "eng",
"type" TEXT,
PRIMARY KEY ("senseId", "language", "phrase")
) WITHOUT ROWID;
CREATE TABLE "JMdict_SenseInfo" (
"senseId" INTEGER NOT NULL REFERENCES "JMdict_Sense"("id"),
"info" TEXT NOT NULL,
PRIMARY KEY ("senseId", "info")
) WITHOUT ROWID;
-- There is not a single example sentence that doesn't come from
-- the Tanaka Corpus, so I will leave the type out for now.
CREATE TABLE "JMdict_ExampleSentence" (
"id" INTEGER PRIMARY KEY,
"senseId" INTEGER REFERENCES "JMdict_Sense"("id"),
"word" TEXT NOT NULL,
"source" TEXT NOT NULL,
"sourceLanguage" CHAR(3) NOT NULL DEFAULT "eng",
"japanese" TEXT NOT NULL
-- "type" TEXT NOT NULL DEFAULT "tat",
);
-- These tables are for optimizing searches.
-- In order to include results from both, the software should
-- first check if the searchword is convertible to kana, and then
-- potentially get results from both by doing a union between two
-- selects.
CREATE TABLE "JMdict_EntryByKana" (
"kana" TEXT NOT NULL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
PRIMARY KEY ("kana", "entryId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryByKana_byKana" ON "JMdict_EntryByKana"("kana");
CREATE TABLE "JMdict_EntryByEnglish" (
"english" TEXT NOT NULL,
"entryId" INTEGER NOT NULL REFERENCES "JMdict_Entry"("id"),
PRIMARY KEY ("english", "entryId")
) WITHOUT ROWID;
CREATE INDEX "JMdict_EntryByEnglish_byEnglish" ON "JMdict_EntryByEnglish"("english");
--------------
-- RADKFILE --
--------------
CREATE TABLE "RADKFILE" (
"kanji" CHAR(1) NOT NULL,
"radical" CHAR(1) NOT NULL,
PRIMARY KEY ("kanji", "radical")
) WITHOUT ROWID;
CREATE INDEX "RADK" ON "RADKFILE"("radical");
CREATE INDEX "KRAD" ON "RADKFILE"("kanji");
CREATE VIEW "RADKFILE_Radicals" AS
SELECT DISTINCT "radical" FROM "RADKFILE";
--------------
-- KANJIDIC --
--------------
CREATE TABLE "KANJIDIC_Character" (
"literal" CHAR(1) NOT NULL PRIMARY KEY,
"grade" INTEGER CHECK ("grade" BETWEEN 1 AND 10),
"strokeCount" INTEGER NOT NULL,
"frequency" INTEGER,
"jlpt" INTEGER
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_RadicalName" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"name" TEXT NOT NULL,
PRIMARY KEY("kanji", "name")
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_Codepoint" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"type" VARCHAR(6) NOT NULL CHECK ("type" IN ('jis208', 'jis212', 'jis213', 'ucs')),
"codepoint" VARCHAR(7) NOT NULL,
PRIMARY KEY ("kanji", "type")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Codepoint_byCharacter" ON "KANJIDIC_Codepoint"("kanji");
CREATE TABLE "KANJIDIC_Radical" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"type" VARCHAR(9) NOT NULL CHECK ("type" IN ('classical', 'nelson_c')),
"radical" INTEGER NOT NULL CHECK ("radical" BETWEEN 1 AND IIF("type" = 'classical', 214, 212)),
PRIMARY KEY("kanji", "type")
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_StrokeMiscount" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"strokeCount" INTEGER NOT NULL,
PRIMARY KEY("kanji", "strokeCount")
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_Variant" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"variant" TEXT NOT NULL,
"type" VARCHAR(8) NOT NULL CHECK (
"type" IN (
'jis208',
'jis212',
'jis213',
'deroo',
'njecd',
's_h',
'nelson_c',
'oneill',
'ucs'
)
),
PRIMARY KEY ("kanji", "type", "variant")
) WITHOUT ROWID;
CREATE TABLE "_KANJIDIC_DictionaryReference_Part1" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"ref" VARCHAR(9) NOT NULL,
"type" VARCHAR(16) NOT NULL CHECK(
"type" IN (
'nelson_c',
'nelson_n',
'halpern_njecd',
'halpern_kkd',
'halpern_kkld',
'halpern_kkld_2ed',
'heisig',
'heisig6',
'gakken',
'oneill_names',
'oneill_kk',
'henshall',
'sh_kk',
'sh_kk2',
'sakade',
'jf_cards',
'henshall3',
'tutt_cards',
'crowley',
'kanji_in_context',
'busy_people',
'kodansha_compact',
'maniette'
)
),
PRIMARY KEY("kanji", "type")
) WITHOUT ROWID;
CREATE TABLE "_KANJIDIC_DictionaryReference_Moro" (
"kanji" CHAR(1) NOT NULL PRIMARY KEY REFERENCES "KANJIDIC_Character"("literal"),
"ref" VARCHAR(7) NOT NULL,
"volume" INTEGER,
"page" INTEGER
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_DictionaryReference_byPart1" ON "_KANJIDIC_DictionaryReference_Part1"("kanji", "ref", "type");
CREATE INDEX "KANJIDIC_DictionaryReference_byMoro" ON "_KANJIDIC_DictionaryReference_Moro"("kanji", "ref", "volume", "page");
CREATE VIEW "KANJIDIC_DictionaryReference" AS
SELECT "kanji", "ref", "type", NULL AS "volume", NULL AS "page" FROM "_KANJIDIC_DictionaryReference_Part1"
UNION
SELECT "kanji", "ref", 'moro' AS "type", "volume", "page" FROM "_KANJIDIC_DictionaryReference_Moro";
CREATE TABLE "KANJIDIC_QueryCode" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"code" VARCHAR(7) NOT NULL,
"type" VARCHAR(11) NOT NULL CHECK ("type" IN ('skip', 'sh_desc', 'four_corner', 'deroo', 'misclass')),
"SKIPMisclassification" VARCHAR(15),
PRIMARY KEY ("kanji", "type", "code")
) WITHOUT ROWID;
CREATE TABLE "KANJIDIC_Reading" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"type" VARCHAR(8) NOT NULL CHECK ("type" IN ('korean_h', 'korean_r', 'pinyin')),
"reading" TEXT NOT NULL,
PRIMARY KEY ("kanji", "type", "reading")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Reading_byReading" ON "KANJIDIC_Reading"("reading");
CREATE TABLE "KANJIDIC_Kunyomi" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"yomi" TEXT NOT NULL,
"isJouyou" BOOLEAN,
PRIMARY KEY ("kanji", "yomi")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Kunyomi_byYomi" ON "KANJIDIC_Kunyomi"("yomi");
CREATE TABLE "KANJIDIC_Onyomi" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"yomi" TEXT NOT NULL,
"type" VARCHAR(7) CHECK ("type" IN ('kan', 'go', 'tou', 'kan''you')),
"isJouyou" BOOLEAN,
PRIMARY KEY ("kanji", "yomi")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Onyomi_byYomi" ON "KANJIDIC_Onyomi"("yomi");
CREATE TABLE "KANJIDIC_Meaning" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"language" CHAR(3) NOT NULL DEFAULT "eng",
"meaning" TEXT NOT NULL,
PRIMARY KEY ("kanji", "language", "meaning")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Meaning_byMeaning" ON "KANJIDIC_Meaning"("meaning");
CREATE TABLE "KANJIDIC_Nanori" (
"kanji" CHAR(1) NOT NULL REFERENCES "KANJIDIC_Character"("literal"),
"nanori" TEXT NOT NULL,
PRIMARY KEY ("kanji", "nanori")
) WITHOUT ROWID;
CREATE INDEX "KANJIDIC_Nanori_byNanori" ON "KANJIDIC_Nanori"("nanori");
-------------------------
-- Interdict relations --
-------------------------
-- Radk - kanjidic
-- kanjireading -> filter kanji regex - kanjidic
-- index kanji search by romaji
-- index kanji search by hiragana
-- index word search by romaji
-- index word search by hiragana

View File

@ -0,0 +1,251 @@
INSERT INTO "JMdict_InfoDialect"("id", "description") VALUES
('bra', 'Brazilian'),
('hob', 'Hokkaido-ben'),
('ksb', 'Kansai-ben'),
('ktb', 'Kantou-ben'),
('kyb', 'Kyoto-ben'),
('kyu', 'Kyuushuu-ben'),
('nab', 'Nagano-ben'),
('osb', 'Osaka-ben'),
('rkb', 'Ryuukyuu-ben'),
('thb', 'Touhoku-ben'),
('tsb', 'Tosa-ben'),
('tsug', 'Tsugaru-ben');
INSERT INTO "JMdict_InfoField"("id", "description") VALUES
('agric', 'agriculture'),
('anat', 'anatomy'),
('archeol', 'archeology'),
('archit', 'architecture'),
('art', 'art, aesthetics'),
('astron', 'astronomy'),
('audvid', 'audiovisual'),
('aviat', 'aviation'),
('baseb', 'baseball'),
('biochem', 'biochemistry'),
('biol', 'biology'),
('bot', 'botany'),
('Buddh', 'Buddhism'),
('bus', 'business'),
('chem', 'chemistry'),
('Christn', 'Christianity'),
('cloth', 'clothing'),
('comp', 'computing'),
('cryst', 'crystallography'),
('ecol', 'ecology'),
('econ', 'economics'),
('elec', 'electricity, elec. eng.'),
('electr', 'electronics'),
('embryo', 'embryology'),
('engr', 'engineering'),
('ent', 'entomology'),
('finc', 'finance'),
('fish', 'fishing'),
('food', 'food, cooking'),
('gardn', 'gardening, horticulture'),
('genet', 'genetics'),
('geogr', 'geography'),
('geol', 'geology'),
('geom', 'geometry'),
('go', 'go (game)'),
('golf', 'golf'),
('gramm', 'grammar'),
('grmyth', 'Greek mythology'),
('hanaf', 'hanafuda'),
('horse', 'horse racing'),
('law', 'law'),
('ling', 'linguistics'),
('logic', 'logic'),
('MA', 'martial arts'),
('mahj', 'mahjong'),
('math', 'mathematics'),
('mech', 'mechanical engineering'),
('med', 'medicine'),
('met', 'meteorology'),
('mil', 'military'),
('music', 'music'),
('ornith', 'ornithology'),
('paleo', 'paleontology'),
('pathol', 'pathology'),
('pharm', 'pharmacy'),
('phil', 'philosophy'),
('photo', 'photography'),
('physics', 'physics'),
('physiol', 'physiology'),
('print', 'printing'),
('psy', 'psychiatry'),
('psych', 'psychology'),
('rail', 'railway'),
('Shinto', 'Shinto'),
('shogi', 'shogi'),
('sports', 'sports'),
('stat', 'statistics'),
('sumo', 'sumo'),
('telec', 'telecommunications'),
('tradem', 'trademark'),
('vidg', 'video games'),
('zool', 'zoology');
INSERT INTO "JMdict_InfoKanji"("id", "description") VALUES
('ateji', 'ateji (phonetic) reading'),
('ik', 'word containing irregular kana usage'),
('iK', 'word containing irregular kanji usage'),
('io', 'irregular okurigana usage'),
('oK', 'word containing out-dated kanji or kanji usage'),
('rK', 'rarely-used kanji form');
INSERT INTO "JMdict_InfoMisc"("id", "description") VALUES
('abbr', 'abbreviation'),
('arch', 'archaism'),
('char', 'character'),
('chn', 'children''s language'),
('col', 'colloquialism'),
('company', 'company name'),
('creat', 'creature'),
('dated', 'dated term'),
('dei', 'deity'),
('derog', 'derogatory'),
('doc', 'document'),
('ev', 'event'),
('fam', 'familiar language'),
('fem', 'female term or language'),
('fict', 'fiction'),
('form', 'formal or literary term'),
('given', 'given name or forename, gender not specified'),
('group', 'group'),
('hist', 'historical term'),
('hon', 'honorific or respectful (sonkeigo) language'),
('hum', 'humble (kenjougo) language'),
('id', 'idiomatic expression'),
('joc', 'jocular, humorous term'),
('leg', 'legend'),
('m-sl', 'manga slang'),
('male', 'male term or language'),
('myth', 'mythology'),
('net-sl', 'Internet slang'),
('obj', 'object'),
('obs', 'obsolete term'),
('obsc', 'obscure term'),
('on-mim', 'onomatopoeic or mimetic word'),
('organization', 'organization name'),
('oth', 'other'),
('person', 'full name of a particular person'),
('place', 'place name'),
('poet', 'poetical term'),
('pol', 'polite (teineigo) language'),
('product', 'product name'),
('proverb', 'proverb'),
('quote', 'quotation'),
('rare', 'rare'),
('relig', 'religion'),
('sens', 'sensitive'),
('serv', 'service'),
('sl', 'slang'),
('station', 'railway station'),
('surname', 'family or surname'),
('uk', 'word usually written using kana alone'),
('unclass', 'unclassified name'),
('vulg', 'vulgar expression or word'),
('work', 'work of art, literature, music, etc. name'),
('X', 'rude or X-rated term (not displayed in educational software)'),
('yoji', 'yojijukugo');
INSERT INTO "JMdict_InfoPOS"("id", "description") VALUES
('adj-f', 'noun or verb acting prenominally'),
('adj-i', 'adjective (keiyoushi)'),
('adj-ix', 'adjective (keiyoushi) - yoi/ii class'),
('adj-kari', '''kari'' adjective (archaic)'),
('adj-ku', '''ku'' adjective (archaic)'),
('adj-na', 'adjectival nouns or quasi-adjectives (keiyodoshi)'),
('adj-nari', 'archaic/formal form of na-adjective'),
('adj-no', 'nouns which may take the genitive case particle ''no'''),
('adj-pn', 'pre-noun adjectival (rentaishi)'),
('adj-shiku', '''shiku'' adjective (archaic)'),
('adj-t', '''taru'' adjective'),
('adv', 'adverb (fukushi)'),
('adv-to', 'adverb taking the ''to'' particle'),
('aux', 'auxiliary'),
('aux-adj', 'auxiliary adjective'),
('aux-v', 'auxiliary verb'),
('conj', 'conjunction'),
('cop', 'copula'),
('ctr', 'counter'),
('exp', 'expressions (phrases, clauses, etc.)'),
('int', 'interjection (kandoushi)'),
('n', 'noun (common) (futsuumeishi)'),
('n-adv', 'adverbial noun (fukushitekimeishi)'),
('n-pr', 'proper noun'),
('n-pref', 'noun, used as a prefix'),
('n-suf', 'noun, used as a suffix'),
('n-t', 'noun (temporal) (jisoumeishi)'),
('num', 'numeric'),
('pn', 'pronoun'),
('pref', 'prefix'),
('prt', 'particle'),
('suf', 'suffix'),
('unc', 'unclassified'),
('v-unspec', 'verb unspecified'),
('v1', 'Ichidan verb'),
('v1-s', 'Ichidan verb - kureru special class'),
('v2a-s', 'Nidan verb with ''u'' ending (archaic)'),
('v2b-k', 'Nidan verb (upper class) with ''bu'' ending (archaic)'),
('v2b-s', 'Nidan verb (lower class) with ''bu'' ending (archaic)'),
('v2d-k', 'Nidan verb (upper class) with ''dzu'' ending (archaic)'),
('v2d-s', 'Nidan verb (lower class) with ''dzu'' ending (archaic)'),
('v2g-k', 'Nidan verb (upper class) with ''gu'' ending (archaic)'),
('v2g-s', 'Nidan verb (lower class) with ''gu'' ending (archaic)'),
('v2h-k', 'Nidan verb (upper class) with ''hu/fu'' ending (archaic)'),
('v2h-s', 'Nidan verb (lower class) with ''hu/fu'' ending (archaic)'),
('v2k-k', 'Nidan verb (upper class) with ''ku'' ending (archaic)'),
('v2k-s', 'Nidan verb (lower class) with ''ku'' ending (archaic)'),
('v2m-k', 'Nidan verb (upper class) with ''mu'' ending (archaic)'),
('v2m-s', 'Nidan verb (lower class) with ''mu'' ending (archaic)'),
('v2n-s', 'Nidan verb (lower class) with ''nu'' ending (archaic)'),
('v2r-k', 'Nidan verb (upper class) with ''ru'' ending (archaic)'),
('v2r-s', 'Nidan verb (lower class) with ''ru'' ending (archaic)'),
('v2s-s', 'Nidan verb (lower class) with ''su'' ending (archaic)'),
('v2t-k', 'Nidan verb (upper class) with ''tsu'' ending (archaic)'),
('v2t-s', 'Nidan verb (lower class) with ''tsu'' ending (archaic)'),
('v2w-s', 'Nidan verb (lower class) with ''u'' ending and ''we'' conjugation (archaic)'),
('v2y-k', 'Nidan verb (upper class) with ''yu'' ending (archaic)'),
('v2y-s', 'Nidan verb (lower class) with ''yu'' ending (archaic)'),
('v2z-s', 'Nidan verb (lower class) with ''zu'' ending (archaic)'),
('v4b', 'Yodan verb with ''bu'' ending (archaic)'),
('v4g', 'Yodan verb with ''gu'' ending (archaic)'),
('v4h', 'Yodan verb with ''hu/fu'' ending (archaic)'),
('v4k', 'Yodan verb with ''ku'' ending (archaic)'),
('v4m', 'Yodan verb with ''mu'' ending (archaic)'),
('v4n', 'Yodan verb with ''nu'' ending (archaic)'),
('v4r', 'Yodan verb with ''ru'' ending (archaic)'),
('v4s', 'Yodan verb with ''su'' ending (archaic)'),
('v4t', 'Yodan verb with ''tsu'' ending (archaic)'),
('v5aru', 'Godan verb - -aru special class'),
('v5b', 'Godan verb with ''bu'' ending'),
('v5g', 'Godan verb with ''gu'' ending'),
('v5k', 'Godan verb with ''ku'' ending'),
('v5k-s', 'Godan verb - Iku/Yuku special class'),
('v5m', 'Godan verb with ''mu'' ending'),
('v5n', 'Godan verb with ''nu'' ending'),
('v5r', 'Godan verb with ''ru'' ending'),
('v5r-i', 'Godan verb with ''ru'' ending (irregular verb)'),
('v5s', 'Godan verb with ''su'' ending'),
('v5t', 'Godan verb with ''tsu'' ending'),
('v5u', 'Godan verb with ''u'' ending'),
('v5u-s', 'Godan verb with ''u'' ending (special class)'),
('v5uru', 'Godan verb - Uru old class verb (old form of Eru)'),
('vi', 'intransitive verb'),
('vk', 'Kuru verb - special class'),
('vn', 'irregular nu verb'),
('vr', 'irregular ru verb, plain form ends with -ri'),
('vs', 'noun or participle which takes the aux. verb suru'),
('vs-c', 'su verb - precursor to the modern suru'),
('vs-i', 'suru verb - included'),
('vs-s', 'suru verb - special class'),
('vt', 'transitive verb'),
('vz', 'Ichidan verb - zuru verb (alternative form of -jiru verbs)');
INSERT INTO "JMdict_InfoReading"("id", "description") VALUES
('gikun', 'gikun (meaning as reading) or jukujikun (special kanji reading)'),
('ik', 'word containing irregular kana usage'),
('ok', 'out-dated or obsolete kana usage'),
('uK', 'word usually written using kanji alone');

99
pub2nix.lock Normal file
View File

@ -0,0 +1,99 @@
packages:
collection:
dependency: direct main
description:
name: collection
url: https://pub.dartlang.org
source: hosted
version: 1.16.0
sha256: 0nx7mbxwxw5z4mdjr9z8hg6g8kgy3cv5pv5ax0j1i9kl36brk5rg
ffi:
dependency: transitive
description:
name: ffi
url: https://pub.dartlang.org
source: hosted
version: 1.1.2
sha256: 0w0yd43y1fsfzihd4j6diymg90bgvyi2zqyb3vf0k6g8hk8x1yr6
js:
dependency: transitive
description:
name: js
url: https://pub.dartlang.org
source: hosted
version: 0.6.4
sha256: 01knzh9890ygxpy59rsh77h2ilh69wyl83idvrcvwzk8fknjldkb
lints:
dependency: direct dev
description:
name: lints
url: https://pub.dartlang.org
source: hosted
version: 1.0.1
sha256: 1xyn9xpzwfw1f0mp03pyvspcphkinhzawkgp5lwmi7p15mv1vgz2
meta:
dependency: transitive
description:
name: meta
url: https://pub.dartlang.org
source: hosted
version: 1.7.0
sha256: 1z8sx23l9jn2ickq3z63pqpb6k9y6gbnnvj9200c6v7m3cvd7jbv
path:
dependency: transitive
description:
name: path
url: https://pub.dartlang.org
source: hosted
version: 1.8.1
sha256: 0wg5da3zykfbala8mvcl7r7blgi5qjb838qhw30brgj3ani2hdph
petitparser:
dependency: transitive
description:
name: petitparser
url: https://pub.dartlang.org
source: hosted
version: 5.0.0
sha256: 01rcmvk1znjykph6znkd3skvfn61lj54km4xw6vwa5iwwg84p5ph
sqflite_common:
dependency: transitive
description:
name: sqflite_common
url: https://pub.dartlang.org
source: hosted
version: 2.2.1+1
sha256: 1i3fmvgj0f1ynf03rd1x9r0bmxly333jyi392ghh1ahm0lnj1kzq
sqflite_common_ffi:
dependency: direct main
description:
name: sqflite_common_ffi
url: https://pub.dartlang.org
source: hosted
version: 2.1.1
sha256: 0v5xq9xpg63zidf8as17zh0pbhfzf9k00a2wn183jz9i5rmh6207
sqlite3:
dependency: transitive
description:
name: sqlite3
url: https://pub.dartlang.org
source: hosted
version: 1.7.0
sha256: 1x56g99nw3jqvx1ysggpmmvb9gap63wdxk0pjawzv47xxm058rhm
synchronized:
dependency: transitive
description:
name: synchronized
url: https://pub.dartlang.org
source: hosted
version: 3.0.0+2
sha256: 1j6108cq1hbcqpwhk9sah8q3gcidd7222bzhha2nk9syxhzqy82i
xml:
dependency: direct main
description:
name: xml
url: https://pub.dartlang.org
source: hosted
version: 6.0.1
sha256: 158srisyld2zwhchcz264rf8qnby54xan4db14hf7lda7bx8ikbh
sdks:
dart: '>=2.16.1 <3.0.0'

89
pubspec.lock Normal file
View File

@ -0,0 +1,89 @@
# Generated by pub
# See https://dart.dev/tools/pub/glossary#lockfile
packages:
collection:
dependency: "direct main"
description:
name: collection
url: "https://pub.dartlang.org"
source: hosted
version: "1.16.0"
ffi:
dependency: transitive
description:
name: ffi
url: "https://pub.dartlang.org"
source: hosted
version: "1.1.2"
js:
dependency: transitive
description:
name: js
url: "https://pub.dartlang.org"
source: hosted
version: "0.6.4"
lints:
dependency: "direct dev"
description:
name: lints
url: "https://pub.dartlang.org"
source: hosted
version: "1.0.1"
meta:
dependency: transitive
description:
name: meta
url: "https://pub.dartlang.org"
source: hosted
version: "1.7.0"
path:
dependency: transitive
description:
name: path
url: "https://pub.dartlang.org"
source: hosted
version: "1.8.1"
petitparser:
dependency: transitive
description:
name: petitparser
url: "https://pub.dartlang.org"
source: hosted
version: "5.0.0"
sqflite_common:
dependency: transitive
description:
name: sqflite_common
url: "https://pub.dartlang.org"
source: hosted
version: "2.2.1+1"
sqflite_common_ffi:
dependency: "direct main"
description:
name: sqflite_common_ffi
url: "https://pub.dartlang.org"
source: hosted
version: "2.1.1"
sqlite3:
dependency: transitive
description:
name: sqlite3
url: "https://pub.dartlang.org"
source: hosted
version: "1.7.0"
synchronized:
dependency: transitive
description:
name: synchronized
url: "https://pub.dartlang.org"
source: hosted
version: "3.0.0+2"
xml:
dependency: "direct main"
description:
name: xml
url: "https://pub.dartlang.org"
source: hosted
version: "6.0.1"
sdks:
dart: ">=2.16.1 <3.0.0"

21
pubspec.yaml Normal file
View File

@ -0,0 +1,21 @@
name: ja_db
description: A SQLite database containing open source japanese language translation data
version: 1.0.0
homepage: https://git.nani.wtf/h7x4/ja_db
environment:
sdk: '>=2.16.1 <3.0.0'
dependencies:
collection: ^1.16.0
sqflite_common_ffi: ^2.1.1
xml: ^6.0.1
dev_dependencies:
lints: ^1.0.0
executables:
ja_db: ja_db
platforms:
linux: