-
Notifications
You must be signed in to change notification settings - Fork 455
Feature: Add llava support #577
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
using LLama.Grammars; | ||
using LLama.Native; | ||
using System; | ||
|
||
namespace LLama.Common | ||
{ | ||
public class LLamaSamplingContext | ||
{ | ||
public LLamaSamplingParams parameters; | ||
|
||
// mirostat sampler state | ||
public float mirostat_mu; | ||
|
||
public IntPtr grammar; | ||
// internal | ||
public IntPtr parsed_grammar; | ||
|
||
// TODO: replace with ring-buffer | ||
public LLamaToken[] prev; | ||
public LLamaTokenData[] cur; | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
using LLama.Native; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Runtime.InteropServices; | ||
using System.Text; | ||
|
||
namespace LLama.Common | ||
{ | ||
[StructLayout(LayoutKind.Sequential)] | ||
public class LLamaSamplingParams | ||
{ | ||
public int n_prev = 64; // number of previous tokens to remember | ||
public int n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. | ||
public int top_k = 40; // <= 0 to use vocab size | ||
public float top_p = 0.95f; // 1.0 = disabled | ||
public float min_p = 0.05f; // 0.0 = disabled | ||
public float tfs_z = 1.00f; // 1.0 = disabled | ||
public float typical_p = 1.00f; // 1.0 = disabled | ||
public float temp = 0.70f; // <= 0.0 to sample greedily, 0.0 to not output probabilities | ||
public float dynatemp_range = 0.0f; //0.0 = disabled | ||
public float dynatemp_exponent = 1.0f; // controls how entropy maps to temperature in dynamic temperature sampler | ||
public int penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) | ||
public float penalty_repeat = 1.10f; // 1.0 = disabled | ||
public float penalty_freq = 0.00f; // 0.0 = disabled | ||
public float penalty_present = 0.00f; // 0.0 = disabled | ||
public int mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 | ||
public float mirostat_tau = 5.00f; // target entropy | ||
public float mirostat_eta = 0.10f; // learning rate | ||
public bool penalize_nl = true; // consider newlines as a repeatable token | ||
|
||
public string samplers_sequence = "kfypmt"; // top_k, tail_free, typical_p, top_p, min_p, temp | ||
|
||
public string grammar = string.Empty; // optional BNF-like grammar to constrain sampling | ||
|
||
// Classifier-Free Guidance | ||
// https://arxiv.org/abs/2306.17806 | ||
|
||
public string cfg_negative_prompt = string.Empty; // string to help guidance | ||
public float cfg_scale = 1.0f; // how strong is guidance | ||
|
||
//std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens | ||
public IntPtr logit_bias; | ||
public LLamaToken[] penalty_prompt_tokens; | ||
public bool use_penalty_prompt_tokens = false; | ||
} | ||
public struct logit_bias_struct | ||
{ | ||
public LLamaToken token; | ||
public float bias; | ||
} | ||
|
||
|
||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Runtime.InteropServices; | ||
using System.Text; | ||
|
||
namespace LLama.LLava | ||
{ | ||
[StructLayout(LayoutKind.Sequential)] | ||
unsafe | ||
public class LLavaImageEmbed | ||
{ | ||
public float* embed; | ||
public int n_image_pos; | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
using LLama.Native; | ||
using System; | ||
using System.Runtime.InteropServices; | ||
|
||
namespace LLama.LLava | ||
{ | ||
[StructLayout(LayoutKind.Sequential)] | ||
public class LLavaContext | ||
{ | ||
public IntPtr ClipContext; | ||
public SafeLLamaContextHandle LLamaContext; | ||
public SafeLlamaModelHandle model; | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
|
||
namespace LLama | ||
{ | ||
internal class LLavaContext | ||
{ | ||
|
||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
using LLama.Native; | ||
using Microsoft.Extensions.Logging; | ||
using System.Threading.Tasks; | ||
|
||
namespace LLama | ||
{ | ||
public class LLavaInteractExecutor /*: InteractiveExecutor*/ | ||
{ | ||
/// <summary> | ||
/// weights of LLava model | ||
/// </summary> | ||
protected SafeLlavaModelHandle handle; | ||
|
||
/// <summary> | ||
/// | ||
/// </summary> | ||
/// <param name="context"></param> | ||
/// <param name="logger"></param> | ||
//public LLavaInteractExecutor(SafeLlavaModelHandle handel, ILogger? logger = null) | ||
//{ | ||
// this.handle = handel; | ||
// this.logger = logger; | ||
//} | ||
|
||
//protected override Task PreprocessInputs(string prompt, byte[] imageByte, InferStateArgs args) | ||
//{ | ||
// if (_is_prompt_run) | ||
// { | ||
// // When running the first input (prompt) in inteactive mode, we should specially process it. | ||
// _embed_inps = Context.Tokenize(text, true).ToList(); | ||
// } | ||
// else | ||
// { | ||
// if (!text.EndsWith("\n")) | ||
// { | ||
// text += "\n"; | ||
// } | ||
// var line_inp = Context.Tokenize(text, false); | ||
// _embed_inps.AddRange(line_inp); | ||
// args.RemainedTokens -= line_inp.Length; | ||
// } | ||
|
||
// return Task.CompletedTask; | ||
|
||
//} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
|
||
namespace LLama.Native | ||
{ | ||
public enum LLamaVocabType | ||
{ | ||
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece | ||
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding | ||
}; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
using System; | ||
using LLama.LLava; | ||
using System; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
|
||
#pragma warning disable IDE1006 // Naming Styles | ||
|
@@ -187,6 +189,7 @@ public static void llama_empty_call() | |
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] | ||
public static extern unsafe float* llama_get_logits_ith(SafeLLamaContextHandle ctx, int i); | ||
|
||
|
||
/// <summary> | ||
/// Get the embeddings for the input | ||
/// </summary> | ||
|
@@ -330,7 +333,7 @@ public static int llama_token_to_piece(SafeLlamaModelHandle model, LLamaToken ll | |
/// </summary> | ||
/// <param name="logCallback"></param> | ||
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] | ||
public static extern void llama_log_set(LLamaLogCallback logCallback); | ||
public static extern void llama_log_set(LLamaLogCallback logCallback); | ||
|
||
/// <summary> | ||
/// Clear the KV cache | ||
|
@@ -438,5 +441,69 @@ public static int llama_token_to_piece(SafeLlamaModelHandle model, LLamaToken ll | |
/// <returns></returns> | ||
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] | ||
public static extern void llama_set_n_threads(SafeLLamaContextHandle ctx, uint n_threads, uint n_threads_batch); | ||
|
||
/// <summary> | ||
/// Get vocab type from model | ||
/// </summary> | ||
/// <param name="model"></param> | ||
/// <returns></returns> | ||
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] | ||
public extern static LLamaVocabType llama_vocab_type(SafeLlamaModelHandle model); | ||
|
||
/// <summary> | ||
/// | ||
/// </summary> | ||
/// <param name="tokens"></param> | ||
/// <param name="n_tokens"></param> | ||
/// <param name="pos_0"></param> | ||
/// <param name="seq_id"></param> | ||
/// <returns></returns> | ||
|
||
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] | ||
unsafe public extern static LLamaNativeBatch llama_batch_get_one(LLamaToken* tokens, int n_tokens, ref int pos_0, int seq_id); | ||
/// <summary> | ||
/// Set the Init time to ggml process | ||
/// </summary> | ||
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] | ||
public static extern void ggml_time_init(); | ||
|
||
/// <summary> | ||
/// Load clip model | ||
/// </summary> | ||
/// <param name="clip_model_path">Clip model path</param> | ||
/// <param name="verbosity"></param> | ||
/// <returns></returns> | ||
[DllImport(llavaLibName, CallingConvention = CallingConvention.Cdecl)] | ||
public extern static IntPtr clip_model_load(string clip_model_path, int verbosity = 1); | ||
|
||
/// <summary> | ||
/// Free the clip context | ||
/// </summary> | ||
/// <param name="ctx">Clip context</param> | ||
[DllImport(llavaLibName, CallingConvention = CallingConvention.Cdecl)] | ||
public static extern void clip_free(IntPtr ctx); | ||
|
||
/// <summary> | ||
/// Read an image from bytes | ||
/// </summary> | ||
/// <param name="ctx_clip"></param> | ||
/// <param name="n_threads"></param> | ||
/// <param name="bytes"></param> | ||
/// <param name="image_bytes_length"></param> | ||
/// <returns></returns> | ||
[DllImport(llavaLibName, CallingConvention = CallingConvention.Cdecl)] | ||
public extern static LLavaImageEmbed llava_image_embed_make_with_bytes(IntPtr ctx_clip, int n_threads, byte[] bytes, int image_bytes_length); | ||
|
||
/// <summary> | ||
/// Embed an image and get the token length | ||
/// </summary> | ||
/// <param name="ctx_llama"></param> | ||
/// <param name="image_embed"></param> | ||
/// <param name="n_batch"></param> | ||
/// <param name="n_past"></param> | ||
/// <returns></returns> | ||
[DllImport(llavaLibName, CallingConvention = CallingConvention.Cdecl)] | ||
public extern unsafe static bool llava_eval_image_embed(SafeLLamaContextHandle ctx_llama, LLavaImageEmbed image_embed, int n_batch, ref int n_past); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use a separate file such as |
||
|
||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems it's duplicated with
LLava/LLavaContext
.