|
11 | 11 | [clojure.java.io :as io]
|
12 | 12 | [clojure.set]
|
13 | 13 | [clojure.string :as string]
|
14 |
| - [datomic.codeq.util :refer [index->id-fn tempid?]] |
15 | 14 | [datomic.codeq.analyzer :as az]
|
16 | 15 | [datomic.codeq.git :as git]
|
17 | 16 | [datomic.codeq.analyzers.clj])
|
| 17 | + (:use datomic.codeq.util) |
18 | 18 | (:import java.util.Date
|
19 |
| - org.eclipse.jgit.lib.Repository |
20 |
| - org.eclipse.jgit.revwalk.RevCommit) |
| 19 | + org.eclipse.jgit.lib.Repository) |
21 | 20 | (:gen-class))
|
22 | 21 |
|
23 | 22 | (set! *warn-on-reflection* true)
|
|
172 | 171 | :db/valueType :db.type/ref
|
173 | 172 | :db/cardinality :db.cardinality/one
|
174 | 173 | :db/doc "Git object (tree/blob) in a tree node"
|
| 174 | + :db/index true |
175 | 175 | :db.install/_attribute :db.part/db}
|
176 | 176 |
|
177 | 177 | {:db/id #db/id[:db.part/db]
|
|
259 | 259 | @(d/transact conn schema)))
|
260 | 260 |
|
261 | 261 |
|
| 262 | +(defn authors-tx-data |
| 263 | + [db {:keys [author committer]}] |
| 264 | + (let [tempid? map? |
| 265 | + email->id (index->id-fn db :email/address) |
| 266 | + authorid (email->id author) |
| 267 | + committerid (email->id committer) |
| 268 | + tx-data (cond-> [] |
| 269 | + ;;record author's email if new |
| 270 | + (tempid? authorid) |
| 271 | + (conj [:db/add authorid :email/address author]) |
| 272 | + ;;record committer's email if new and is |
| 273 | + ;;distinct from the author's email |
| 274 | + (and (not= committer author) (tempid? committerid)) |
| 275 | + (conj [:db/add committerid :email/address committer]))] |
| 276 | + [authorid committerid tx-data])) |
| 277 | + |
| 278 | + |
| 279 | +(defn commit-node-tx-data |
| 280 | + [db repoid root-nodeid {:keys [sha msg parents authored committed] :as commit}] |
| 281 | + (let [commitid (d/tempid :db.part/user) |
| 282 | + [authorid committerid author-tx-data] (authors-tx-data db commit)] |
| 283 | + (into author-tx-data |
| 284 | + [[:db/add repoid :repo/commits commitid] |
| 285 | + {:db/id (d/tempid :db.part/tx) |
| 286 | + :tx/commit commitid |
| 287 | + :tx/op :import} |
| 288 | + (cond-> {:db/id commitid |
| 289 | + :git/type :commit |
| 290 | + :commit/tree root-nodeid |
| 291 | + :git/sha sha |
| 292 | + :commit/author authorid |
| 293 | + :commit/authoredAt authored |
| 294 | + :commit/committer committerid |
| 295 | + :commit/committedAt committed} |
| 296 | + msg |
| 297 | + (assoc :commit/message msg) |
| 298 | + parents |
| 299 | + (assoc :commit/parents |
| 300 | + (mapv (fn [p] |
| 301 | + (if-let [id (index-get-id db :git/sha p)] |
| 302 | + id |
| 303 | + (throw (ex-info "Parent not previously imported" |
| 304 | + {:sha sha :parent p})))) |
| 305 | + parents)))]))) |
| 306 | + |
| 307 | + |
262 | 308 | (defn commit-tx-data
|
263 |
| - [db repo repoid repo-name {:keys [sha msg tree parents author authored committer committed] :as commit}] |
| 309 | + [db repo repoid repo-name {:keys [sha msg tree parents authored committed] :as commit}] |
264 | 310 | (let [tempid? map? ;;todo - better pred
|
265 | 311 | sha->id (index->id-fn db :git/sha)
|
266 |
| - email->id (index->id-fn db :email/address) |
267 | 312 | filename->id (index->id-fn db :file/name)
|
268 |
| - authorid (email->id author) |
269 |
| - committerid (email->id committer) |
270 |
| - cid (d/tempid :db.part/user) |
271 |
| - tx-data (fn f [inpath [sha type filename]] |
272 |
| - (let [path (str inpath filename) |
273 |
| - id (sha->id sha) |
274 |
| - filenameid (filename->id filename) |
275 |
| - pathid (filename->id path) |
276 |
| - nodeid (or (and (not (tempid? id)) |
277 |
| - (not (tempid? filenameid)) |
278 |
| - (ffirst (d/q '[:find ?e :in $ ?filename ?id |
279 |
| - :where [?e :node/filename ?filename] [?e :node/object ?id]] |
280 |
| - db filenameid id))) |
281 |
| - (d/tempid :db.part/user)) |
282 |
| - newpath (or (tempid? pathid) (tempid? nodeid) |
283 |
| - (not (ffirst (d/q '[:find ?node :in $ ?path |
284 |
| - :where [?node :node/paths ?path]] |
285 |
| - db pathid)))) |
286 |
| - data (cond-> [] |
287 |
| - (tempid? filenameid) (conj [:db/add filenameid :file/name filename]) |
288 |
| - (tempid? pathid) (conj [:db/add pathid :file/name path]) |
289 |
| - (tempid? nodeid) (conj {:db/id nodeid :node/filename filenameid :node/object id}) |
290 |
| - newpath (conj [:db/add nodeid :node/paths pathid]) |
291 |
| - (tempid? id) (conj {:db/id id :git/sha sha :git/type type})) |
292 |
| - data (if (and newpath (= type :tree)) |
293 |
| - (let [rev-tree (git/lookup-rev-tree repo sha) |
294 |
| - es (git/shallow-tree-walk repo rev-tree)] |
295 |
| - (reduce (fn [data child] |
296 |
| - (let [[cid cdata] (f (str path "/") child) |
297 |
| - data (into data cdata)] |
298 |
| - (cond-> data |
299 |
| - (tempid? id) (conj [:db/add id :tree/nodes cid])))) |
300 |
| - data es)) |
301 |
| - data)] |
302 |
| - [nodeid data])) |
303 |
| - [treeid treedata] (tx-data nil [tree :tree repo-name]) |
304 |
| - tx (into treedata |
305 |
| - [[:db/add repoid :repo/commits cid] |
306 |
| - {:db/id (d/tempid :db.part/tx) |
307 |
| - :tx/commit cid |
308 |
| - :tx/op :import} |
309 |
| - (cond-> {:db/id cid |
310 |
| - :git/type :commit |
311 |
| - :commit/tree treeid |
312 |
| - :git/sha sha |
313 |
| - :commit/author authorid |
314 |
| - :commit/authoredAt authored |
315 |
| - :commit/committer committerid |
316 |
| - :commit/committedAt committed |
317 |
| - } |
318 |
| - msg (assoc :commit/message msg) |
319 |
| - parents (assoc :commit/parents |
320 |
| - (mapv (fn [p] |
321 |
| - (let [id (sha->id p)] |
322 |
| - (assert (not (tempid? id)) |
323 |
| - (str "Parent " p " not previously imported")) |
324 |
| - id)) |
325 |
| - parents)))]) |
326 |
| - tx (cond-> tx |
327 |
| - (tempid? authorid) |
328 |
| - (conj [:db/add authorid :email/address author]) |
329 |
| - |
330 |
| - (and (not= committer author) (tempid? committerid)) |
331 |
| - (conj [:db/add committerid :email/address committer]))] |
332 |
| - tx)) |
| 313 | + commitid (d/tempid :db.part/user) |
| 314 | + ;;find a node by object and filename id |
| 315 | + check-for-node |
| 316 | + (fn [object-id filename-id] |
| 317 | + (some #(let [{eid :e} %] |
| 318 | + (if (seq (d/datoms db :eavt eid :node/filename filename-id)) |
| 319 | + eid)) |
| 320 | + (d/datoms db :avet :node/object object-id))) |
| 321 | + |
| 322 | + walker-fn |
| 323 | + (fn [{:keys [sha type path filename parent]}] |
| 324 | + (let [;;lookup id for file/tree object by tree sha |
| 325 | + objid (sha->id sha) |
| 326 | + ;;lookup id for file name |
| 327 | + filenameid (filename->id filename) |
| 328 | + ;;lookup id for complete path |
| 329 | + pathid (filename->id path) |
| 330 | + ;;lookup id for tree node |
| 331 | + ;;new if either tree sha or filename are new |
| 332 | + ;;i.e. file with new content |
| 333 | + nodeid (or (and (not (tempid? objid)) |
| 334 | + (not (tempid? filenameid)) |
| 335 | + (check-for-node objid filenameid)) |
| 336 | + (d/tempid :db.part/user)) |
| 337 | + ;;path is new if: 1. path name is new, or 2. path name |
| 338 | + ;;exists but tree node is new (file name and sha are |
| 339 | + ;;unique), or 3. path name and tree node both exist |
| 340 | + ;;but the former is not already a path of the latter. |
| 341 | + newpath (or (tempid? pathid) (tempid? nodeid) |
| 342 | + (every? #(not= nodeid (:e %)) |
| 343 | + (d/datoms db :vaet pathid :node/paths))) |
| 344 | + data (cond-> |
| 345 | + [] |
| 346 | + ;;record file name if new |
| 347 | + (tempid? filenameid) |
| 348 | + (conj [:db/add filenameid :file/name filename]) |
| 349 | + ;;record path name if new |
| 350 | + (tempid? pathid) |
| 351 | + (conj [:db/add pathid :file/name path]) |
| 352 | + ;;record tree node if new |
| 353 | + (tempid? nodeid) |
| 354 | + (conj {:db/id nodeid |
| 355 | + :node/filename filenameid |
| 356 | + :node/object objid}) |
| 357 | + ;;link tree node to new paths |
| 358 | + newpath |
| 359 | + (conj [:db/add nodeid :node/paths pathid]) |
| 360 | + ;;link new tree node to parent |
| 361 | + (and (tempid? nodeid) parent) |
| 362 | + (conj [:db/add parent :tree/nodes nodeid]) |
| 363 | + ;;record the sha of file/tree objects |
| 364 | + (tempid? objid) |
| 365 | + (conj {:db/id objid :git/sha sha :git/type type}))] |
| 366 | + ;;emit the tree node, its object, |
| 367 | + ;;and the accumulated tx data |
| 368 | + ;;indicating if the tree node is new |
| 369 | + {:node-id nodeid |
| 370 | + :object-id objid |
| 371 | + :new-path newpath |
| 372 | + :data data})) |
| 373 | + |
| 374 | + [root-nodeid treedata] (git/deep-tree-walk repo repo-name tree walker-fn)] |
| 375 | + (into treedata |
| 376 | + (commit-node-tx-data db repoid root-nodeid commit)))) |
333 | 377 |
|
334 | 378 |
|
335 | 379 | (defn unimported-commits
|
|
367 | 411 | (doseq [commit commits]
|
368 | 412 | (let [db (d/db conn)]
|
369 | 413 | (println "Importing commit:" (:sha commit))
|
370 |
| - (d/transact conn (commit-tx-data db repo repoid repo-name commit)))) |
| 414 | + @(d/transact conn (commit-tx-data db repo repoid repo-name commit)))) |
371 | 415 | (d/request-index conn)
|
372 | 416 | (println "Import complete!")))
|
373 | 417 |
|
|
0 commit comments