TDT4310-project-sorted-japa.../flake.nix

83 lines
2.3 KiB
Nix

{ inputs = {
nixpkgs.url = "nixpkgs/nixos-22.11";
JMdictSrc = {
url = "http://ftp.edrdg.org/pub/Nihongo/JMdict_e.gz";
flake = false;
};
tatoeba_jpn_indices = {
url = "https://downloads.tatoeba.org/exports/jpn_indices.tar.bz2";
flake = false;
};
tatoeba_eng_sentences = {
url = "https://downloads.tatoeba.org/exports/per_language/eng/eng_sentences.tsv.bz2";
flake = false;
};
};
outputs = { self, nixpkgs, JMdictSrc, tatoeba_jpn_indices, tatoeba_eng_sentences }: let
system = "x86_64-linux";
pkgs = nixpkgs.legacyPackages.${system};
inherit (pkgs) lib;
in {
packages.${system} = {
unidic = pkgs.callPackage ./nix/unidic.nix pkgs.python3Packages;
mecab-unidic = pkgs.callPackage ./nix/mecab-unidic.nix {
mecab-base = import "${nixpkgs}/pkgs/tools/text/mecab/base.nix" { inherit (pkgs) fetchurl; };
inherit (self.packages.${system}) unidic;
};
jmdict = pkgs.stdenvNoCC.mkDerivation {
name = "JMdict";
src = JMdictSrc;
dontUnpack = true;
nativeBuildInputs = with pkgs; [ xmlformat ];
buildPhase = ''
gzip -dkc ${JMdictSrc} > JMdict.xml
xmlformat -i JMdict.xml
'';
installPhase = ''
mkdir $out
cp JMdict.xml $out
'';
meta = {
description = "A Japanese-Multilingual Dictionary providing lexical data for japanese words";
homepage = "https://www.edrdg.org/jmdict/j_jmdict.html";
license = {
shortName = "EDRDG";
fullName = "Electronic Dictionary Research and Development Group General Dictionary Licence";
url = "http://www.csse.monash.edu.au/~jwb/edrdg/licence.html";
};
maintainers = [ "h7x4 <h7x4@nani.wtf>" ];
platforms = lib.platforms.all;
};
};
};
devShells.${system}.default = pkgs.mkShell {
packages = with pkgs; [
self.packages.${system}.unidic
] ++ (with pkgs.python3Packages; [
# flask
(mecab-python3.override { mecab = self.packages.${system}.mecab-unidic; })
nltk
pandas
python
requests
scikit-learn
spacy
sqlalchemy
wget
]);
};
};
}