Created
May 10, 2020 09:51
-
-
Save k0f1sh/0e1cac30df52301cae08ac4aa1708fd0 to your computer and use it in GitHub Desktop.
第1章: 準備運動
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; https://nlp100.github.io/ja/ch01.html | |
;; helper | |
(defn get-words [s] | |
(->> (clojure.string/split s #"[\., ]") | |
(filter #(not (empty? %))))) | |
(defn make-ngram [n coll] | |
(if (> n (count coll)) | |
[] | |
(cons (take n coll) (make-ngram n (rest coll))))) | |
(def bi-gram (partial make-ngram 2)) | |
(defn bi-gram-string [s] | |
(->> | |
(bi-gram (clojure.string/replace s #" " "")) | |
(map #(apply str %)))) | |
;; 01 | |
(apply str (reverse "stressed")) | |
;; 02 | |
(let [s "パタトクカシーー"] | |
(apply str (take-nth 2 s))) | |
;; 03 | |
(let [s "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics." | |
words (get-words s)] | |
(map (fn [word] (count word)) words)) | |
;; 04 | |
(let [s "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can." | |
one-char-indexes [1 5 6 7 8 9 15 16 19] | |
words (get-words s)] | |
(->> | |
(map-indexed (fn [& args] args) words) | |
(map (fn [[i w]] | |
(let [take-n (if (some #(= (inc i) %) one-char-indexes) 1 2)] | |
[(apply str (take take-n w)) (inc i)]))) | |
(into {}))) | |
;; 05 | |
;; word bi-gram | |
(let [s "I am an NLPer" | |
words (get-words s)] | |
(bi-gram words)) | |
;; character bi-gram | |
(let [s "I am an NLPer" | |
words (get-words s)] | |
(->> | |
(bi-gram (clojure.string/replace s #" " "")) | |
(map #(apply str %)))) | |
;; 06 | |
(let [s1 "paraparaparadise" | |
s2 "paragraph" | |
x (into #{} (bi-gram-string s1)) | |
y (into #{} (bi-gram-string s2))] | |
(let [union (clojure.set/union x y) ; 和集合 | |
intersection (clojure.set/intersection x y) ; 積集合 | |
difference (clojure.set/difference x y)] ; 差集合 | |
(clojure.pprint/pprint union) | |
(clojure.pprint/pprint intersection) | |
(clojure.pprint/pprint difference) | |
(clojure.pprint/pprint(contains? x "se")) | |
(clojure.pprint/pprint(contains? y "se")))) | |
;; 07 | |
(defn hoge [x y z] | |
(str x "時の" y "は" z)) | |
(println (hoge 12 "気温" 22.4)) | |
;; 08 | |
(defn cipher [s] | |
(->> (map (fn [c] | |
(if (Character/isLowerCase c) | |
(char (- 219 (int c))) | |
c)) s) | |
(apply str))) | |
(cipher (cipher "abcdefghijklmnopqlstuvwxyz")) | |
;; 09 | |
(let [s "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ." | |
words (clojure.string/split s #" ")] | |
(map (fn [word] | |
(if (> (count word) 4) | |
(let [first-char (nth word 0) | |
last-char (nth word (dec (count word))) | |
interval-char (apply str (shuffle (drop-last (drop 1 word))))] | |
(str first-char | |
interval-char | |
last-char)) | |
word)) | |
words)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment